2006-05-02 15:06:28

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 0/3] VFS changes to collapse AIO and vectored IO into single (set of) fileops.

Hi,

These series of patches collapses all the vectored IO support into
single set of file-operation method using aio_read/aio_write.
This work was originally suggested & started by Christoph Hellwig,
when Zach Brown tried to add vectored support for AIO.

Here is the summary:

[PATCH 1/3] Vectorize aio_read/aio_write methods

[PATCH 2/3] Remove readv/writev methods and use aio_read/aio_write
instead.

[PATCH 3/3] Zach's core aio changes to support vectored AIO.

BTW, Chuck Lever is actually re-arranging NFS DIO, AIO code to
fit into this model.

I ran various testing including LTP on this series. Andrew,
can you include these in -mm tree ?

Thanks,
Badari



2006-05-02 15:07:53

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 1/3] Vectorize aio_read/aio_write methods

This patch vectorizes aio_read() and aio_write() methods to prepare
for collapsing all the vectored & aio operations into one interface -
which is aio_read()/aio_write().


Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>

Documentation/filesystems/Locking | 5 +-
Documentation/filesystems/vfs.txt | 4 +-
drivers/char/raw.c | 14 -------
drivers/usb/gadget/inode.c | 71 +++++++++++++++++++++++++++-----------
fs/aio.c | 15 +++++---
fs/block_dev.c | 10 -----
fs/cifs/cifsfs.c | 6 +--
fs/ext3/file.c | 5 +-
fs/read_write.c | 20 ++++++++--
fs/reiserfs/file.c | 8 ----
fs/xfs/linux-2.6/xfs_file.c | 44 +++++++++++------------
include/linux/aio.h | 2 +
include/linux/fs.h | 10 ++---
include/net/sock.h | 1
mm/filemap.c | 39 ++++++++++----------
net/socket.c | 48 ++++++++++++-------------
16 files changed, 163 insertions(+), 139 deletions(-)

Index: linux-2.6.17-rc3/Documentation/filesystems/Locking
===================================================================
--- linux-2.6.17-rc3.orig/Documentation/filesystems/Locking 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/Documentation/filesystems/Locking 2006-05-02 07:53:58.000000000 -0700
@@ -355,10 +355,9 @@ The last two are called only from check_
prototypes:
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t,
- loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int,
Index: linux-2.6.17-rc3/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.17-rc3.orig/Documentation/filesystems/vfs.txt 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/Documentation/filesystems/vfs.txt 2006-05-02 07:53:58.000000000 -0700
@@ -699,9 +699,9 @@ This describes how the VFS can manipulat
struct file_operations {
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
Index: linux-2.6.17-rc3/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc3.orig/drivers/char/raw.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/drivers/char/raw.c 2006-05-02 07:53:58.000000000 -0700
@@ -250,23 +250,11 @@ static ssize_t raw_file_write(struct fil
return generic_file_write_nolock(file, &local_iov, 1, ppos);
}

-static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
-{
- struct iovec local_iov = {
- .iov_base = (char __user *)buf,
- .iov_len = count
- };
-
- return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-}
-
-
static struct file_operations raw_fops = {
.read = generic_file_read,
.aio_read = generic_file_aio_read,
.write = raw_file_write,
- .aio_write = raw_file_aio_write,
+ .aio_write = generic_file_aio_write_nolock,
.open = raw_open,
.release= raw_release,
.ioctl = raw_ioctl,
Index: linux-2.6.17-rc3/fs/aio.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/aio.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/aio.c 2006-05-02 07:53:58.000000000 -0700
@@ -15,6 +15,7 @@
#include <linux/aio_abi.h>
#include <linux/module.h>
#include <linux/syscalls.h>
+#include <linux/uio.h>

#define DEBUG 0

@@ -1315,8 +1316,11 @@ static ssize_t aio_pread(struct kiocb *i
ssize_t ret = 0;

do {
- ret = file->f_op->aio_read(iocb, iocb->ki_buf,
- iocb->ki_left, iocb->ki_pos);
+ iocb->ki_inline_vec.iov_base = iocb->ki_buf;
+ iocb->ki_inline_vec.iov_len = iocb->ki_left;
+
+ ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec,
+ 1, iocb->ki_pos);
/*
* Can't just depend on iocb->ki_left to determine
* whether we are done. This may have been a short read.
@@ -1349,8 +1353,11 @@ static ssize_t aio_pwrite(struct kiocb *
ssize_t ret = 0;

do {
- ret = file->f_op->aio_write(iocb, iocb->ki_buf,
- iocb->ki_left, iocb->ki_pos);
+ iocb->ki_inline_vec.iov_base = iocb->ki_buf;
+ iocb->ki_inline_vec.iov_len = iocb->ki_left;
+
+ ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec,
+ 1, iocb->ki_pos);
if (ret > 0) {
iocb->ki_buf += ret;
iocb->ki_left -= ret;
Index: linux-2.6.17-rc3/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/block_dev.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/block_dev.c 2006-05-02 07:53:58.000000000 -0700
@@ -1064,14 +1064,6 @@ static ssize_t blkdev_file_write(struct
return generic_file_write_nolock(file, &local_iov, 1, ppos);
}

-static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
-{
- struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-}
-
static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
@@ -1094,7 +1086,7 @@ const struct file_operations def_blk_fop
.read = generic_file_read,
.write = blkdev_file_write,
.aio_read = generic_file_aio_read,
- .aio_write = blkdev_file_aio_write,
+ .aio_write = generic_file_aio_write_nolock,
.mmap = generic_file_mmap,
.fsync = block_fsync,
.unlocked_ioctl = block_ioctl,
Index: linux-2.6.17-rc3/fs/cifs/cifsfs.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/cifs/cifsfs.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/cifs/cifsfs.c 2006-05-02 07:53:58.000000000 -0700
@@ -496,13 +496,13 @@ static ssize_t cifs_file_writev(struct f
return written;
}

-static ssize_t cifs_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
ssize_t written;

- written = generic_file_aio_write(iocb, buf, count, pos);
+ written = generic_file_aio_write(iocb, iov, nr_segs, pos);
if (!CIFS_I(inode)->clientCanCacheAll)
filemap_fdatawrite(inode->i_mapping);
return written;
Index: linux-2.6.17-rc3/fs/ext3/file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/ext3/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/ext3/file.c 2006-05-02 07:53:58.000000000 -0700
@@ -48,14 +48,15 @@ static int ext3_release_file (struct ino
}

static ssize_t
-ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_dentry->d_inode;
ssize_t ret;
int err;

- ret = generic_file_aio_write(iocb, buf, count, pos);
+ ret = generic_file_aio_write(iocb, iov, nr_segs, pos);

/*
* Skip flushing if there was an error, or if nothing was written.
Index: linux-2.6.17-rc3/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/read_write.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/read_write.c 2006-05-02 07:53:58.000000000 -0700
@@ -227,14 +227,20 @@ static void wait_on_retry_sync_kiocb(str

ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
+ struct iovec iov = { .iov_base = buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;

init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- while (-EIOCBRETRY ==
- (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
+ kiocb.ki_left = len;
+
+ for (;;) {
+ ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
wait_on_retry_sync_kiocb(&kiocb);
+ }

if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
@@ -279,14 +285,20 @@ EXPORT_SYMBOL(vfs_read);

ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{
+ struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;

init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- while (-EIOCBRETRY ==
- (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos)))
+ kiocb.ki_left = len;
+
+ for (;;) {
+ ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
wait_on_retry_sync_kiocb(&kiocb);
+ }

if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
Index: linux-2.6.17-rc3/fs/reiserfs/file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/reiserfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/reiserfs/file.c 2006-05-02 07:53:58.000000000 -0700
@@ -1560,12 +1560,6 @@ static ssize_t reiserfs_file_write(struc
return res;
}

-static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
- size_t count, loff_t pos)
-{
- return generic_file_aio_write(iocb, buf, count, pos);
-}
-
const struct file_operations reiserfs_file_operations = {
.read = generic_file_read,
.write = reiserfs_file_write,
@@ -1575,7 +1569,7 @@ const struct file_operations reiserfs_fi
.fsync = reiserfs_sync_file,
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
- .aio_write = reiserfs_aio_write,
+ .aio_write = generic_file_aio_write,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
Index: linux-2.6.17-rc3/fs/xfs/linux-2.6/xfs_file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/xfs/linux-2.6/xfs_file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/xfs/linux-2.6/xfs_file.c 2006-05-02 07:53:58.000000000 -0700
@@ -51,12 +51,11 @@ static struct vm_operations_struct xfs_d
STATIC inline ssize_t
__xfs_file_read(
struct kiocb *iocb,
- char __user *buf,
+ const struct iovec *iov,
+ unsigned long nr_segs,
int ioflags,
- size_t count,
loff_t pos)
{
- struct iovec iov = {buf, count};
struct file *file = iocb->ki_filp;
vnode_t *vp = vn_from_inode(file->f_dentry->d_inode);
ssize_t rval;
@@ -65,39 +64,38 @@ __xfs_file_read(

if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
- VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+ VOP_READ(vp, iocb, iov, nr_segs, &iocb->ki_pos, ioflags, NULL, rval);
return rval;
}

STATIC ssize_t
xfs_file_aio_read(
struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_read(iocb, buf, IO_ISAIO, count, pos);
+ return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
}

STATIC ssize_t
xfs_file_aio_read_invis(
struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+ return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

STATIC inline ssize_t
__xfs_file_write(
- struct kiocb *iocb,
- const char __user *buf,
- int ioflags,
- size_t count,
- loff_t pos)
+ struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs,
+ int ioflags,
+ loff_t pos)
{
- struct iovec iov = {(void __user *)buf, count};
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
vnode_t *vp = vn_from_inode(inode);
@@ -107,28 +105,28 @@ __xfs_file_write(
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;

- VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+ VOP_WRITE(vp, iocb, iov, nr_segs, &iocb->ki_pos, ioflags, NULL, rval);
return rval;
}

STATIC ssize_t
xfs_file_aio_write(
struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_write(iocb, buf, IO_ISAIO, count, pos);
+ return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
}

STATIC ssize_t
xfs_file_aio_write_invis(
struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+ return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

STATIC inline ssize_t
Index: linux-2.6.17-rc3/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/fs.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/include/linux/fs.h 2006-05-02 07:53:58.000000000 -0700
@@ -1015,9 +1015,9 @@ struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
@@ -1594,11 +1594,11 @@ extern int file_send_actor(read_descript
extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *);
-extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t);
+extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *);
-extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t);
+extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
- unsigned long, loff_t *);
+ unsigned long, loff_t);
extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
unsigned long *, loff_t, loff_t *, size_t, size_t);
extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
Index: linux-2.6.17-rc3/include/net/sock.h
===================================================================
--- linux-2.6.17-rc3.orig/include/net/sock.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/include/net/sock.h 2006-05-02 07:53:58.000000000 -0700
@@ -659,7 +659,6 @@ struct sock_iocb {
struct sock *sk;
struct scm_cookie *scm;
struct msghdr *msg, async_msg;
- struct iovec async_iov;
struct kiocb *kiocb;
};

Index: linux-2.6.17-rc3/mm/filemap.c
===================================================================
--- linux-2.6.17-rc3.orig/mm/filemap.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/mm/filemap.c 2006-05-02 07:53:58.000000000 -0700
@@ -1096,14 +1096,12 @@ out:
EXPORT_SYMBOL(__generic_file_aio_read);

ssize_t
-generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
- struct iovec local_iov = { .iov_base = buf, .iov_len = count };
-
BUG_ON(iocb->ki_pos != pos);
- return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
+ return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos);
}
-
EXPORT_SYMBOL(generic_file_aio_read);

ssize_t
@@ -2163,22 +2161,21 @@ out:
current->backing_dev_info = NULL;
return written ? written : err;
}
-EXPORT_SYMBOL(generic_file_aio_write_nolock);

-ssize_t
-generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
+ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
+ const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- loff_t pos = *ppos;

- ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos);
+ BUG_ON(iocb->ki_pos != pos);
+
+ ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);

if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
+ ssize_t err;

err = sync_page_range_nolock(inode, mapping, pos, ret);
if (err < 0)
@@ -2186,6 +2183,7 @@ generic_file_aio_write_nolock(struct kio
}
return ret;
}
+EXPORT_SYMBOL(generic_file_aio_write_nolock);

static ssize_t
__generic_file_write_nolock(struct file *file, const struct iovec *iov,
@@ -2195,9 +2193,11 @@ __generic_file_write_nolock(struct file
ssize_t ret;

init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = *ppos;
ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
- if (ret == -EIOCBQUEUED)
+ if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
return ret;
}

@@ -2209,28 +2209,27 @@ generic_file_write_nolock(struct file *f
ssize_t ret;

init_sync_kiocb(&kiocb, file);
- ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
+ kiocb.ki_pos = *ppos;
+ ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, *ppos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
return ret;
}
EXPORT_SYMBOL(generic_file_write_nolock);

-ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };

BUG_ON(iocb->ki_pos != pos);

mutex_lock(&inode->i_mutex);
- ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1,
- &iocb->ki_pos);
+ ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);

if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
Index: linux-2.6.17-rc3/net/socket.c
===================================================================
--- linux-2.6.17-rc3.orig/net/socket.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/net/socket.c 2006-05-02 07:53:58.000000000 -0700
@@ -96,10 +96,10 @@
#include <linux/netfilter.h>

static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
- size_t size, loff_t pos);
-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t size, loff_t pos);
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
static int sock_mmap(struct file *file, struct vm_area_struct * vma);

static int sock_close(struct inode *inode, struct file *file);
@@ -700,7 +700,7 @@ static ssize_t sock_sendpage(struct file
}

static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
- char __user *ubuf, size_t size, struct sock_iocb *siocb)
+ struct sock_iocb *siocb)
{
if (!is_sync_kiocb(iocb)) {
siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
@@ -710,15 +710,13 @@ static struct sock_iocb *alloc_sock_iocb
}

siocb->kiocb = iocb;
- siocb->async_iov.iov_base = ubuf;
- siocb->async_iov.iov_len = size;
-
iocb->private = siocb;
return siocb;
}

static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
- struct file *file, struct iovec *iov, unsigned long nr_segs)
+ struct file *file, const struct iovec *iov,
+ unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
@@ -749,31 +747,33 @@ static ssize_t sock_readv(struct file *f
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;

- ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+ ret = do_sock_read(&msg, &iocb, file, iov, nr_segs);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}

-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
- size_t count, loff_t pos)
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;

if (pos != 0)
return -ESPIPE;
- if (count == 0) /* Match SYS5 behaviour */
+
+ if (iocb->ki_left == 0) /* Match SYS5 behaviour */
return 0;

- x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
+
+ x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;
- return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
- &x->async_iov, 1);
+ return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}

static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
- struct file *file, struct iovec *iov, unsigned long nr_segs)
+ struct file *file, const struct iovec *iov,
+ unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
@@ -806,28 +806,28 @@ static ssize_t sock_writev(struct file *
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;

- ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+ ret = do_sock_write(&msg, &iocb, file, iov, nr_segs);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}

-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
- size_t count, loff_t pos)
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;

if (pos != 0)
return -ESPIPE;
- if (count == 0) /* Match SYS5 behaviour */
+
+ if (iocb->ki_left == 0) /* Match SYS5 behaviour */
return 0;

- x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
+ x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;

- return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
- &x->async_iov, 1);
+ return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}


Index: linux-2.6.17-rc3/drivers/usb/gadget/inode.c
===================================================================
--- linux-2.6.17-rc3.orig/drivers/usb/gadget/inode.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/drivers/usb/gadget/inode.c 2006-05-02 07:53:58.000000000 -0700
@@ -528,7 +528,8 @@ struct kiocb_priv {
struct usb_request *req;
struct ep_data *epdata;
void *buf;
- char __user *ubuf;
+ struct iovec *iv;
+ unsigned long count;
unsigned actual;
};

@@ -556,18 +557,32 @@ static int ep_aio_cancel(struct kiocb *i
static ssize_t ep_aio_read_retry(struct kiocb *iocb)
{
struct kiocb_priv *priv = iocb->private;
- ssize_t status = priv->actual;
+ ssize_t len, total;

/* we "retry" to get the right mm context for this: */
- status = copy_to_user(priv->ubuf, priv->buf, priv->actual);
- if (unlikely(0 != status))
- status = -EFAULT;
- else
- status = priv->actual;
+
+ /* copy stuff into user buffers */
+ total = priv->actual;
+ len = 0;
+ for (i=0; i < priv->count; i++) {
+ ssize_t this = min(priv->iv[i].iov_len, (size_t)total);
+
+ if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this))
+ break;
+
+ total -= this;
+ len += this;
+ if (total <= 0)
+ break;
+ }
+
+ if (unlikely(len != 0))
+ len = -EFAULT;
+
kfree(priv->buf);
kfree(priv);
aio_put_req(iocb);
- return status;
+ return len;
}

static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
@@ -615,7 +630,8 @@ ep_aio_rwtail(
char *buf,
size_t len,
struct ep_data *epdata,
- char __user *ubuf
+ const struct iovec *iv,
+ unsigned long count
)
{
struct kiocb_priv *priv = (void *) &iocb->private;
@@ -630,7 +646,8 @@ fail:
return value;
}
iocb->private = priv;
- priv->ubuf = ubuf;
+ priv->iovec = iv;
+ priv->count = count;

value = get_ready_ep(iocb->ki_filp->f_flags, epdata);
if (unlikely(value < 0)) {
@@ -675,36 +692,52 @@ fail:
}

static ssize_t
-ep_aio_read(struct kiocb *iocb, char __user *ubuf, size_t len, loff_t o)
+ep_aio_read(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t o)
{
struct ep_data *epdata = iocb->ki_filp->private_data;
char *buf;
+ size_t len;
+ int i = 0;
+ ssize_t ret;

if (unlikely(epdata->desc.bEndpointAddress & USB_DIR_IN))
return -EINVAL;
- buf = kmalloc(len, GFP_KERNEL);
+
+ buf = kmalloc(iocb->ki_left, GFP_KERNEL);
if (unlikely(!buf))
return -ENOMEM;
+
iocb->ki_retry = ep_aio_read_retry;
- return ep_aio_rwtail(iocb, buf, len, epdata, ubuf);
+ return ep_aio_rwtail(iocb, buf, len, epdata, iv, count);
}

static ssize_t
-ep_aio_write(struct kiocb *iocb, const char __user *ubuf, size_t len, loff_t o)
+ep_aio_write(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t o)
{
struct ep_data *epdata = iocb->ki_filp->private_data;
char *buf;
+ size_t len = 0;
+ int i = 0;
+ ssize_t ret;

if (unlikely(!(epdata->desc.bEndpointAddress & USB_DIR_IN)))
return -EINVAL;
- buf = kmalloc(len, GFP_KERNEL);
+
+ buf = kmalloc(iocb->ki_left, GFP_KERNEL);
if (unlikely(!buf))
return -ENOMEM;
- if (unlikely(copy_from_user(buf, ubuf, len) != 0)) {
- kfree(buf);
- return -EFAULT;
+
+ for (i=0; i < count; i++) {
+ if (unlikely(copy_from_user(&buf[len], iv[i]->iov_base,
+ iv[i]->iov_len) != 0)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+ len += iv[i]->iov_len;
}
- return ep_aio_rwtail(iocb, buf, len, epdata, NULL);
+ return ep_aio_rwtail(iocb, buf, len, epdata, NULL, 0);
}

/*----------------------------------------------------------------------*/
Index: linux-2.6.17-rc3/include/linux/aio.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/aio.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/include/linux/aio.h 2006-05-02 07:53:58.000000000 -0700
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/aio_abi.h>
+#include <linux/uio.h>

#include <asm/atomic.h>

@@ -112,6 +113,7 @@ struct kiocb {
long ki_retried; /* just for testing */
long ki_kicked; /* just for testing */
long ki_queued; /* just for testing */
+ struct iovec ki_inline_vec; /* inline vector */

struct list_head ki_list; /* the aio core uses this
* for cancellation */


2006-05-02 15:08:36

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 2/3] Remove readv/writev methods and use aio_read/aio_write instead

This patch removes readv() and writev() methods and replaces
them with aio_read()/aio_write() methods.

Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
drivers/char/raw.c | 2
drivers/net/tun.c | 35 +++------------
fs/bad_inode.c | 2
fs/block_dev.c | 2
fs/cifs/cifsfs.c | 16 ------
fs/compat.c | 44 ++++---------------
fs/ext2/file.c | 2
fs/ext3/file.c | 2
fs/fat/file.c | 2
fs/fuse/dev.c | 35 +++------------
fs/hostfs/hostfs_kern.c | 2
fs/jfs/file.c | 2
fs/ntfs/file.c | 2
fs/pipe.c | 49 +++++----------------
fs/read_write.c | 101 +++++++++++++++++++++++++++++---------------
fs/read_write.h | 14 ++++++
fs/xfs/linux-2.6/xfs_file.c | 92 ----------------------------------------
include/linux/fs.h | 6 --
mm/filemap.c | 36 ---------------
net/socket.c | 40 -----------------
sound/core/pcm_native.c | 40 ++++++++---------
21 files changed, 141 insertions(+), 385 deletions(-)

Index: linux-2.6.17-rc3/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc3.orig/drivers/char/raw.c 2006-05-02 07:53:58.000000000 -0700
+++ linux-2.6.17-rc3/drivers/char/raw.c 2006-05-02 07:54:12.000000000 -0700
@@ -258,8 +258,6 @@ static struct file_operations raw_fops =
.open = raw_open,
.release= raw_release,
.ioctl = raw_ioctl,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.owner = THIS_MODULE,
};

Index: linux-2.6.17-rc3/drivers/net/tun.c
===================================================================
--- linux-2.6.17-rc3.orig/drivers/net/tun.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/drivers/net/tun.c 2006-05-02 07:54:12.000000000 -0700
@@ -289,11 +289,10 @@ static inline size_t iov_total(const str
return len;
}

-/* Writev */
-static ssize_t tun_chr_writev(struct file * file, const struct iovec *iv,
- unsigned long count, loff_t *pos)
+static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
{
- struct tun_struct *tun = file->private_data;
+ struct tun_struct *tun = iocb->ki_filp->private_data;

if (!tun)
return -EBADFD;
@@ -303,14 +302,6 @@ static ssize_t tun_chr_writev(struct fil
return tun_get_user(tun, (struct iovec *) iv, iov_total(iv, count));
}

-/* Write */
-static ssize_t tun_chr_write(struct file * file, const char __user * buf,
- size_t count, loff_t *pos)
-{
- struct iovec iv = { (void __user *) buf, count };
- return tun_chr_writev(file, &iv, 1, pos);
-}
-
/* Put packet to the user space buffer */
static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
struct sk_buff *skb,
@@ -344,10 +335,10 @@ static __inline__ ssize_t tun_put_user(s
return total;
}

-/* Readv */
-static ssize_t tun_chr_readv(struct file *file, const struct iovec *iv,
- unsigned long count, loff_t *pos)
+static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
{
+ struct file *file = iocb->ki_filp;
struct tun_struct *tun = file->private_data;
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
@@ -427,14 +418,6 @@ static ssize_t tun_chr_readv(struct file
return ret;
}

-/* Read */
-static ssize_t tun_chr_read(struct file * file, char __user * buf,
- size_t count, loff_t *pos)
-{
- struct iovec iv = { buf, count };
- return tun_chr_readv(file, &iv, 1, pos);
-}
-
static void tun_setup(struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
@@ -762,10 +745,8 @@ static int tun_chr_close(struct inode *i
static struct file_operations tun_fops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
- .read = tun_chr_read,
- .readv = tun_chr_readv,
- .write = tun_chr_write,
- .writev = tun_chr_writev,
+ .aio_read = tun_chr_aio_read,
+ .aio_write = tun_chr_aio_write,
.poll = tun_chr_poll,
.ioctl = tun_chr_ioctl,
.open = tun_chr_open,
Index: linux-2.6.17-rc3/fs/bad_inode.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/bad_inode.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/bad_inode.c 2006-05-02 07:54:12.000000000 -0700
@@ -40,8 +40,6 @@ static const struct file_operations bad_
.aio_fsync = EIO_ERROR,
.fasync = EIO_ERROR,
.lock = EIO_ERROR,
- .readv = EIO_ERROR,
- .writev = EIO_ERROR,
.sendfile = EIO_ERROR,
.sendpage = EIO_ERROR,
.get_unmapped_area = EIO_ERROR,
Index: linux-2.6.17-rc3/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/block_dev.c 2006-05-02 07:53:58.000000000 -0700
+++ linux-2.6.17-rc3/fs/block_dev.c 2006-05-02 07:54:12.000000000 -0700
@@ -1093,8 +1093,6 @@ const struct file_operations def_blk_fop
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
- .readv = generic_file_readv,
- .writev = generic_file_write_nolock,
.sendfile = generic_file_sendfile,
};

Index: linux-2.6.17-rc3/fs/cifs/cifsfs.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/cifs/cifsfs.c 2006-05-02 07:53:58.000000000 -0700
+++ linux-2.6.17-rc3/fs/cifs/cifsfs.c 2006-05-02 07:54:12.000000000 -0700
@@ -484,18 +484,6 @@ cifs_get_sb(struct file_system_type *fs_
return sb;
}

-static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct inode *inode = file->f_dentry->d_inode;
- ssize_t written;
-
- written = generic_file_writev(file, iov, nr_segs, ppos);
- if (!CIFS_I(inode)->clientCanCacheAll)
- filemap_fdatawrite(inode->i_mapping);
- return written;
-}
-
static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -581,8 +569,6 @@ struct inode_operations cifs_symlink_ino
const struct file_operations cifs_file_ops = {
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = cifs_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = cifs_file_aio_write,
.open = cifs_open,
@@ -624,8 +610,6 @@ const struct file_operations cifs_file_d
const struct file_operations cifs_file_nobrl_ops = {
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = cifs_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = cifs_file_aio_write,
.open = cifs_open,
Index: linux-2.6.17-rc3/fs/compat.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/compat.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/compat.c 2006-05-02 07:54:12.000000000 -0700
@@ -55,6 +55,8 @@

extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);

+#include "read_write.h"
+
/*
* Not all architectures have sys_utime, so implement this in terms
* of sys_utimes.
@@ -1139,9 +1141,6 @@ static ssize_t compat_do_readv_writev(in
const struct compat_iovec __user *uvector,
unsigned long nr_segs, loff_t *pos)
{
- typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
- typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *);
-
compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov=iovstack, *vector;
@@ -1224,39 +1223,18 @@ static ssize_t compat_do_readv_writev(in
fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->readv;
+ fnv = file->f_op->aio_read;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->writev;
- }
- if (fnv) {
- ret = fnv(file, iov, nr_segs, pos);
- goto out;
+ fnv = file->f_op->aio_write;
}

- /* Do it by hand, with file-ops */
- ret = 0;
- vector = iov;
- while (nr_segs > 0) {
- void __user * base;
- size_t len;
- ssize_t nr;
-
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(file, base, len, pos);
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+ pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);

- if (nr < 0) {
- if (!ret) ret = nr;
- break;
- }
- ret += nr;
- if (nr != len)
- break;
- }
out:
if (iov != iovstack)
kfree(iov);
@@ -1284,7 +1262,7 @@ compat_sys_readv(unsigned long fd, const
goto out;

ret = -EINVAL;
- if (!file->f_op || (!file->f_op->readv && !file->f_op->read))
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
goto out;

ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos);
@@ -1307,7 +1285,7 @@ compat_sys_writev(unsigned long fd, cons
goto out;

ret = -EINVAL;
- if (!file->f_op || (!file->f_op->writev && !file->f_op->write))
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
goto out;

ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos);
Index: linux-2.6.17-rc3/fs/ext2/file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/ext2/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/ext2/file.c 2006-05-02 07:54:12.000000000 -0700
@@ -50,8 +50,6 @@ const struct file_operations ext2_file_o
.open = generic_file_open,
.release = ext2_release_file,
.fsync = ext2_sync_file,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
Index: linux-2.6.17-rc3/fs/ext3/file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/ext3/file.c 2006-05-02 07:53:58.000000000 -0700
+++ linux-2.6.17-rc3/fs/ext3/file.c 2006-05-02 07:54:12.000000000 -0700
@@ -112,8 +112,6 @@ const struct file_operations ext3_file_o
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = ext3_file_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.ioctl = ext3_ioctl,
.mmap = generic_file_mmap,
.open = generic_file_open,
Index: linux-2.6.17-rc3/fs/fat/file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/fat/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/fat/file.c 2006-05-02 07:54:12.000000000 -0700
@@ -116,8 +116,6 @@ const struct file_operations fat_file_op
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
Index: linux-2.6.17-rc3/fs/fuse/dev.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/fuse/dev.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/fuse/dev.c 2006-05-02 07:54:12.000000000 -0700
@@ -585,14 +585,15 @@ static void request_wait(struct fuse_con
* request_end(). Otherwise add it to the processing list, and set
* the 'sent' flag.
*/
-static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *off)
+static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
int err;
struct fuse_req *req;
struct fuse_in *in;
struct fuse_copy_state cs;
unsigned reqsize;
+ struct file *file = iocb->ki_filp;
struct fuse_conn *fc = fuse_get_conn(file);
if (!fc)
return -EPERM;
@@ -658,15 +659,6 @@ static ssize_t fuse_dev_readv(struct fil
return err;
}

-static ssize_t fuse_dev_read(struct file *file, char __user *buf,
- size_t nbytes, loff_t *off)
-{
- struct iovec iov;
- iov.iov_len = nbytes;
- iov.iov_base = buf;
- return fuse_dev_readv(file, &iov, 1, off);
-}
-
/* Look up request on processing list by unique ID */
static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
{
@@ -711,15 +703,15 @@ static int copy_out_args(struct fuse_cop
* it from the list and copy the rest of the buffer to the request.
* The request is finished by calling request_end()
*/
-static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *off)
+static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
int err;
unsigned nbytes = iov_length(iov, nr_segs);
struct fuse_req *req;
struct fuse_out_header oh;
struct fuse_copy_state cs;
- struct fuse_conn *fc = fuse_get_conn(file);
+ struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
if (!fc)
return -EPERM;

@@ -779,15 +771,6 @@ static ssize_t fuse_dev_writev(struct fi
return err;
}

-static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
- size_t nbytes, loff_t *off)
-{
- struct iovec iov;
- iov.iov_len = nbytes;
- iov.iov_base = (char __user *) buf;
- return fuse_dev_writev(file, &iov, 1, off);
-}
-
static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
{
unsigned mask = POLLOUT | POLLWRNORM;
@@ -921,10 +904,8 @@ static int fuse_dev_fasync(int fd, struc
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
.llseek = no_llseek,
- .read = fuse_dev_read,
- .readv = fuse_dev_readv,
- .write = fuse_dev_write,
- .writev = fuse_dev_writev,
+ .aio_read = fuse_dev_read,
+ .aio_write = fuse_dev_write,
.poll = fuse_dev_poll,
.release = fuse_dev_release,
.fasync = fuse_dev_fasync,
Index: linux-2.6.17-rc3/fs/hostfs/hostfs_kern.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/hostfs/hostfs_kern.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/hostfs/hostfs_kern.c 2006-05-02 07:54:12.000000000 -0700
@@ -390,8 +390,6 @@ static const struct file_operations host
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.write = generic_file_write,
.mmap = generic_file_mmap,
.open = hostfs_file_open,
Index: linux-2.6.17-rc3/fs/jfs/file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/jfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/jfs/file.c 2006-05-02 07:54:12.000000000 -0700
@@ -108,8 +108,6 @@ const struct file_operations jfs_file_op
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.sendfile = generic_file_sendfile,
.fsync = jfs_fsync,
.release = jfs_release,
Index: linux-2.6.17-rc3/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/ntfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/ntfs/file.c 2006-05-02 07:54:12.000000000 -0700
@@ -2298,11 +2298,9 @@ const struct file_operations ntfs_file_o
.llseek = generic_file_llseek, /* Seek inside file. */
.read = generic_file_read, /* Read from file. */
.aio_read = generic_file_aio_read, /* Async read from file. */
- .readv = generic_file_readv, /* Read from file. */
#ifdef NTFS_RW
.write = ntfs_file_write, /* Write to file. */
.aio_write = ntfs_file_aio_write, /* Async write to file. */
- .writev = ntfs_file_writev, /* Write to file. */
/*.release = ,*/ /* Last file is closed. See
fs/ext2/file.c::
ext2_release_file() for
Index: linux-2.6.17-rc3/fs/pipe.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/pipe.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/pipe.c 2006-05-02 07:54:12.000000000 -0700
@@ -147,9 +147,10 @@ static struct pipe_buf_operations anon_p
};

static ssize_t
-pipe_readv(struct file *filp, const struct iovec *_iov,
- unsigned long nr_segs, loff_t *ppos)
+pipe_read(struct kiocb *iocb, const struct iovec *_iov,
+ unsigned long nr_segs, loff_t pos)
{
+ struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
struct pipe_inode_info *pipe;
int do_wakeup;
@@ -248,17 +249,10 @@ pipe_readv(struct file *filp, const stru
}

static ssize_t
-pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
- struct iovec iov = { .iov_base = buf, .iov_len = count };
-
- return pipe_readv(filp, &iov, 1, ppos);
-}
-
-static ssize_t
-pipe_writev(struct file *filp, const struct iovec *_iov,
+pipe_write(struct kiocb *iocb, const struct iovec *_iov,
unsigned long nr_segs, loff_t *ppos)
{
+ struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
struct pipe_inode_info *pipe;
ssize_t ret;
@@ -404,15 +398,6 @@ out:
}

static ssize_t
-pipe_write(struct file *filp, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return pipe_writev(filp, &iov, 1, ppos);
-}
-
-static ssize_t
bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{
return -EBADF;
@@ -630,8 +615,7 @@ pipe_rdwr_open(struct inode *inode, stru
*/
const struct file_operations read_fifo_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
+ .aio_read = pipe_read,
.write = bad_pipe_w,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
@@ -643,8 +627,7 @@ const struct file_operations read_fifo_f
const struct file_operations write_fifo_fops = {
.llseek = no_llseek,
.read = bad_pipe_r,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_write_open,
@@ -654,10 +637,8 @@ const struct file_operations write_fifo_

const struct file_operations rdwr_fifo_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_read = pipe_read,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
@@ -667,8 +648,7 @@ const struct file_operations rdwr_fifo_f

static struct file_operations read_pipe_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
+ .aio_read = pipe_read,
.write = bad_pipe_w,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
@@ -680,8 +660,7 @@ static struct file_operations read_pipe_
static struct file_operations write_pipe_fops = {
.llseek = no_llseek,
.read = bad_pipe_r,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_write_open,
@@ -691,10 +670,8 @@ static struct file_operations write_pipe

static struct file_operations rdwr_pipe_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_read = pipe_read,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
Index: linux-2.6.17-rc3/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/read_write.c 2006-05-02 07:53:58.000000000 -0700
+++ linux-2.6.17-rc3/fs/read_write.c 2006-05-02 07:54:12.000000000 -0700
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
+#include "read_write.h"

#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -450,6 +451,63 @@ unsigned long iov_shorten(struct iovec *

EXPORT_SYMBOL(iov_shorten);

+ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
+{
+ struct kiocb kiocb;
+ ssize_t ret;
+
+ init_sync_kiocb(&kiocb, filp);
+ kiocb.ki_pos = *ppos;
+ kiocb.ki_left = len;
+ kiocb.ki_nbytes = len;
+
+ for (;;) {
+ ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
+ wait_on_retry_sync_kiocb(&kiocb);
+ }
+
+ if (ret == -EIOCBQUEUED)
+ ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
+ return ret;
+}
+
+/* Do it by hand, with file-ops */
+ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
+{
+ struct iovec *vector = iov;
+ ssize_t ret = 0;
+
+
+ while (nr_segs > 0) {
+ void __user * base;
+ size_t len;
+ ssize_t nr;
+
+ base = vector->iov_base;
+ len = vector->iov_len;
+ vector++;
+ nr_segs--;
+
+ nr = fn(filp, base, len, ppos);
+
+ if (nr < 0) {
+ if (!ret)
+ ret = nr;
+ break;
+ }
+ ret += nr;
+ if (nr != len)
+ break;
+ }
+
+ return ret;
+}
+
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)

@@ -457,12 +515,9 @@ static ssize_t do_readv_writev(int type,
const struct iovec __user * uvector,
unsigned long nr_segs, loff_t *pos)
{
- typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
- typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *);
-
size_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov=iovstack, *vector;
+ struct iovec *iov = iovstack;
ssize_t ret;
int seg;
io_fn_t fn;
@@ -532,39 +587,17 @@ static ssize_t do_readv_writev(int type,
fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->readv;
+ fnv = file->f_op->aio_read;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->writev;
- }
- if (fnv) {
- ret = fnv(file, iov, nr_segs, pos);
- goto out;
+ fnv = file->f_op->aio_write;
}

- /* Do it by hand, with file-ops */
- ret = 0;
- vector = iov;
- while (nr_segs > 0) {
- void __user * base;
- size_t len;
- ssize_t nr;
-
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(file, base, len, pos);
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);

- if (nr < 0) {
- if (!ret) ret = nr;
- break;
- }
- ret += nr;
- if (nr != len)
- break;
- }
out:
if (iov != iovstack)
kfree(iov);
@@ -585,7 +618,7 @@ ssize_t vfs_readv(struct file *file, con
{
if (!(file->f_mode & FMODE_READ))
return -EBADF;
- if (!file->f_op || (!file->f_op->readv && !file->f_op->read))
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
return -EINVAL;

return do_readv_writev(READ, file, vec, vlen, pos);
@@ -598,7 +631,7 @@ ssize_t vfs_writev(struct file *file, co
{
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
- if (!file->f_op || (!file->f_op->writev && !file->f_op->write))
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
return -EINVAL;

return do_readv_writev(WRITE, file, vec, vlen, pos);
Index: linux-2.6.17-rc3/fs/read_write.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17-rc3/fs/read_write.h 2006-05-02 07:54:12.000000000 -0700
@@ -0,0 +1,14 @@
+/*
+ * This file is only for sharing some helpers from read_write.c with compat.c.
+ * Don't use anywhere else.
+ */
+
+
+typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
+typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+
+ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
+ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
Index: linux-2.6.17-rc3/fs/xfs/linux-2.6/xfs_file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/xfs/linux-2.6/xfs_file.c 2006-05-02 07:53:58.000000000 -0700
+++ linux-2.6.17-rc3/fs/xfs/linux-2.6/xfs_file.c 2006-05-02 07:54:12.000000000 -0700
@@ -129,94 +129,6 @@ xfs_file_aio_write_invis(
return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

-STATIC inline ssize_t
-__xfs_file_readv(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = vn_from_inode(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
-
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
- VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-xfs_file_readv(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_readv(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-xfs_file_readv_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
-STATIC inline ssize_t
-__xfs_file_writev(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = vn_from_inode(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
-
- VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-xfs_file_writev(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_writev(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-xfs_file_writev_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_writev(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
STATIC ssize_t
xfs_file_sendfile(
struct file *filp,
@@ -577,8 +489,6 @@ const struct file_operations xfs_file_op
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = xfs_file_readv,
- .writev = xfs_file_writev,
.aio_read = xfs_file_aio_read,
.aio_write = xfs_file_aio_write,
.sendfile = xfs_file_sendfile,
@@ -601,8 +511,6 @@ const struct file_operations xfs_invis_f
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = xfs_file_readv_invis,
- .writev = xfs_file_writev_invis,
.aio_read = xfs_file_aio_read_invis,
.aio_write = xfs_file_aio_write_invis,
.sendfile = xfs_file_sendfile_invis,
Index: linux-2.6.17-rc3/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/fs.h 2006-05-02 07:53:58.000000000 -0700
+++ linux-2.6.17-rc3/include/linux/fs.h 2006-05-02 07:54:12.000000000 -0700
@@ -1031,8 +1031,6 @@ struct file_operations {
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
- ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
@@ -1624,10 +1622,6 @@ extern long do_splice_direct(struct file

extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
-extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
-ssize_t generic_file_writev(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
extern loff_t remote_llseek(struct file *file, loff_t offset, int origin);
Index: linux-2.6.17-rc3/mm/filemap.c
===================================================================
--- linux-2.6.17-rc3.orig/mm/filemap.c 2006-05-02 07:53:58.000000000 -0700
+++ linux-2.6.17-rc3/mm/filemap.c 2006-05-02 07:54:12.000000000 -0700
@@ -2267,42 +2267,6 @@ ssize_t generic_file_write(struct file *
}
EXPORT_SYMBOL(generic_file_write);

-ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- ret = __generic_file_aio_read(&kiocb, iov, nr_segs, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-EXPORT_SYMBOL(generic_file_readv);
-
-ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret;
-
- mutex_lock(&inode->i_mutex);
- ret = __generic_file_write_nolock(file, iov, nr_segs, ppos);
- mutex_unlock(&inode->i_mutex);
-
- if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
-
- err = sync_page_range(inode, mapping, *ppos - ret, ret);
- if (err < 0)
- ret = err;
- }
- return ret;
-}
-EXPORT_SYMBOL(generic_file_writev);
-
/*
* Called under i_mutex for writes to S_ISREG files. Returns -EIO if something
* went wrong during pagecache shootdown.
Index: linux-2.6.17-rc3/net/socket.c
===================================================================
--- linux-2.6.17-rc3.orig/net/socket.c 2006-05-02 07:53:58.000000000 -0700
+++ linux-2.6.17-rc3/net/socket.c 2006-05-02 07:54:12.000000000 -0700
@@ -112,10 +112,6 @@ static long compat_sock_ioctl(struct fil
unsigned int cmd, unsigned long arg);
#endif
static int sock_fasync(int fd, struct file *filp, int on);
-static ssize_t sock_readv(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
-static ssize_t sock_writev(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more);

@@ -138,8 +134,6 @@ static struct file_operations socket_fil
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
- .readv = sock_readv,
- .writev = sock_writev,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
};
@@ -736,23 +730,6 @@ static ssize_t do_sock_read(struct msghd
return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
}

-static ssize_t sock_readv(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb iocb;
- struct sock_iocb siocb;
- struct msghdr msg;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- iocb.private = &siocb;
-
- ret = do_sock_read(&msg, &iocb, file, iov, nr_segs);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
-}
-
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -795,23 +772,6 @@ static ssize_t do_sock_write(struct msgh
return __sock_sendmsg(iocb, sock, msg, size);
}

-static ssize_t sock_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct msghdr msg;
- struct kiocb iocb;
- struct sock_iocb siocb;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- iocb.private = &siocb;
-
- ret = do_sock_write(&msg, &iocb, file, iov, nr_segs);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
-}
-
static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
Index: linux-2.6.17-rc3/sound/core/pcm_native.c
===================================================================
--- linux-2.6.17-rc3.orig/sound/core/pcm_native.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/sound/core/pcm_native.c 2006-05-02 07:54:12.000000000 -0700
@@ -2819,8 +2819,8 @@ static ssize_t snd_pcm_write(struct file
return result;
}

-static ssize_t snd_pcm_readv(struct file *file, const struct iovec *_vector,
- unsigned long count, loff_t * offset)
+static ssize_t snd_pcm_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)

{
struct snd_pcm_file *pcm_file;
@@ -2831,22 +2831,22 @@ static ssize_t snd_pcm_readv(struct file
void __user **bufs;
snd_pcm_uframes_t frames;

- pcm_file = file->private_data;
+ pcm_file = iocb->ki_filp->private_data;
substream = pcm_file->substream;
snd_assert(substream != NULL, return -ENXIO);
runtime = substream->runtime;
if (runtime->status->state == SNDRV_PCM_STATE_OPEN)
return -EBADFD;
- if (count > 1024 || count != runtime->channels)
+ if (nr_segs > 1024 || nr_segs != runtime->channels)
return -EINVAL;
- if (!frame_aligned(runtime, _vector->iov_len))
+ if (!frame_aligned(runtime, iov->iov_len))
return -EINVAL;
- frames = bytes_to_samples(runtime, _vector->iov_len);
- bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL);
+ frames = bytes_to_samples(runtime, iov->iov_len);
+ bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
- for (i = 0; i < count; ++i)
- bufs[i] = _vector[i].iov_base;
+ for (i = 0; i < nr_segs; ++i)
+ bufs[i] = iov[i].iov_base;
result = snd_pcm_lib_readv(substream, bufs, frames);
if (result > 0)
result = frames_to_bytes(runtime, result);
@@ -2854,8 +2854,8 @@ static ssize_t snd_pcm_readv(struct file
return result;
}

-static ssize_t snd_pcm_writev(struct file *file, const struct iovec *_vector,
- unsigned long count, loff_t * offset)
+static ssize_t snd_pcm_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct snd_pcm_file *pcm_file;
struct snd_pcm_substream *substream;
@@ -2865,7 +2865,7 @@ static ssize_t snd_pcm_writev(struct fil
void __user **bufs;
snd_pcm_uframes_t frames;

- pcm_file = file->private_data;
+ pcm_file = iocb->ki_filp->private_data;
substream = pcm_file->substream;
snd_assert(substream != NULL, result = -ENXIO; goto end);
runtime = substream->runtime;
@@ -2873,17 +2873,17 @@ static ssize_t snd_pcm_writev(struct fil
result = -EBADFD;
goto end;
}
- if (count > 128 || count != runtime->channels ||
- !frame_aligned(runtime, _vector->iov_len)) {
+ if (nr_segs > 128 || nr_segs != runtime->channels ||
+ !frame_aligned(runtime, iov->iov_len)) {
result = -EINVAL;
goto end;
}
- frames = bytes_to_samples(runtime, _vector->iov_len);
- bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL);
+ frames = bytes_to_samples(runtime, iov->iov_len);
+ bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
- for (i = 0; i < count; ++i)
- bufs[i] = _vector[i].iov_base;
+ for (i = 0; i < nr_segs; ++i)
+ bufs[i] = iov[i].iov_base;
result = snd_pcm_lib_writev(substream, bufs, frames);
if (result > 0)
result = frames_to_bytes(runtime, result);
@@ -3389,7 +3389,7 @@ struct file_operations snd_pcm_f_ops[2]
{
.owner = THIS_MODULE,
.write = snd_pcm_write,
- .writev = snd_pcm_writev,
+ .aio_write = snd_pcm_aio_write,
.open = snd_pcm_playback_open,
.release = snd_pcm_release,
.poll = snd_pcm_playback_poll,
@@ -3401,7 +3401,7 @@ struct file_operations snd_pcm_f_ops[2]
{
.owner = THIS_MODULE,
.read = snd_pcm_read,
- .readv = snd_pcm_readv,
+ .aio_read = snd_pcm_aio_read,
.open = snd_pcm_capture_open,
.release = snd_pcm_release,
.poll = snd_pcm_capture_poll,


2006-05-02 15:10:09

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 3/3] Core aio changes to support vectored AIO.

This work is initially done by Zach Brown to add support for
vectored aio. These are the core changes for AIO to support
IOCB_CMD_PREADV/IOCB_CMD_PWRITEV.

I made few extra changes beyond Zach's work. They are
- took out aio_pread/aio_pwrite and made them
a special case into vectored support
- added single inlined vector to save on kmalloc()
for a simple aio_read/aio_write
- kiocb->ki_left always indicates the amount of
IO need to be done. Made sure that this gets
set in sync case also, so that we don't need
to loop over iovecs to figure out IO size all
the time.

Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: Zach Brown <[email protected]>
Acked-by: Benjamin LaHaise <[email protected]>

fs/aio.c | 165 +++++++++++++++++++++++++++++++++---------------
fs/read_write.c | 127 +++++++++++++++++++++---------------
include/linux/aio.h | 4 +
include/linux/aio_abi.h | 2
include/linux/fs.h | 5 +
5 files changed, 199 insertions(+), 104 deletions(-)

Index: linux-2.6.17-rc3/fs/aio.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/aio.c 2006-05-02 07:53:58.000000000 -0700
+++ linux-2.6.17-rc3/fs/aio.c 2006-05-02 07:54:22.000000000 -0700
@@ -415,6 +415,7 @@ static struct kiocb fastcall *__aio_get_
req->ki_retry = NULL;
req->ki_dtor = NULL;
req->private = NULL;
+ req->ki_iovec = NULL;
INIT_LIST_HEAD(&req->ki_run_list);

/* Check if the completion queue has enough free space to
@@ -460,6 +461,8 @@ static inline void really_put_req(struct

if (req->ki_dtor)
req->ki_dtor(req);
+ if (req->ki_iovec != &req->ki_inline_vec)
+ kfree(req->ki_iovec);
kmem_cache_free(kiocb_cachep, req);
ctx->reqs_active--;

@@ -1301,69 +1304,63 @@ asmlinkage long sys_io_destroy(aio_conte
return -EINVAL;
}

-/*
- * aio_p{read,write} are the default ki_retry methods for
- * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially
- * multiple calls to f_op->aio_read(). They loop around partial progress
- * instead of returning -EIOCBRETRY because they don't have the means to call
- * kick_iocb().
- */
-static ssize_t aio_pread(struct kiocb *iocb)
+static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret)
{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret = 0;
+ struct iovec *iov = &iocb->ki_iovec[iocb->ki_cur_seg];

- do {
- iocb->ki_inline_vec.iov_base = iocb->ki_buf;
- iocb->ki_inline_vec.iov_len = iocb->ki_left;
+ BUG_ON(ret <= 0);

- ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec,
- 1, iocb->ki_pos);
- /*
- * Can't just depend on iocb->ki_left to determine
- * whether we are done. This may have been a short read.
- */
- if (ret > 0) {
- iocb->ki_buf += ret;
- iocb->ki_left -= ret;
+ while (iocb->ki_cur_seg < iocb->ki_nr_segs && ret > 0) {
+ ssize_t this = min(iov->iov_len, (size_t)ret);
+ iov->iov_base += this;
+ iov->iov_len -= this;
+ iocb->ki_left -= this;
+ ret -= this;
+ if (iov->iov_len == 0) {
+ iocb->ki_cur_seg++;
+ iov++;
}
+ }

- /*
- * For pipes and sockets we return once we have some data; for
- * regular files we retry till we complete the entire read or
- * find that we can't read any more data (e.g short reads).
- */
- } while (ret > 0 && iocb->ki_left > 0 &&
- !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
-
- /* This means we must have transferred all that we could */
- /* No need to retry anymore */
- if ((ret == 0) || (iocb->ki_left == 0))
- ret = iocb->ki_nbytes - iocb->ki_left;
-
- return ret;
+ /* the caller should not have done more io than what fit in
+ * the remaining iovecs */
+ BUG_ON(ret > 0 && iocb->ki_left == 0);
}

-/* see aio_pread() */
-static ssize_t aio_pwrite(struct kiocb *iocb)
+static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
{
struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ ssize_t (*rw_op)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
ssize_t ret = 0;
+ unsigned short opcode;
+
+ if ((iocb->ki_opcode == IOCB_CMD_PREADV) ||
+ (iocb->ki_opcode == IOCB_CMD_PREAD)) {
+ rw_op = file->f_op->aio_read;
+ opcode = IOCB_CMD_PREADV;
+ } else {
+ rw_op = file->f_op->aio_write;
+ opcode = IOCB_CMD_PWRITEV;
+ }

do {
- iocb->ki_inline_vec.iov_base = iocb->ki_buf;
- iocb->ki_inline_vec.iov_len = iocb->ki_left;
+ ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
+ iocb->ki_nr_segs - iocb->ki_cur_seg,
+ iocb->ki_pos);
+ if (ret > 0)
+ aio_advance_iovec(iocb, ret);

- ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec,
- 1, iocb->ki_pos);
- if (ret > 0) {
- iocb->ki_buf += ret;
- iocb->ki_left -= ret;
- }
- } while (ret > 0 && iocb->ki_left > 0);
+ /* retry all partial writes. retry partial reads as long as its a
+ * regular file. */
+ } while (ret > 0 && iocb->ki_left > 0 &&
+ (opcode == IOCB_CMD_PWRITEV ||
+ (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));

+ /* This means we must have transferred all that we could */
+ /* No need to retry anymore */
if ((ret == 0) || (iocb->ki_left == 0))
ret = iocb->ki_nbytes - iocb->ki_left;

@@ -1390,6 +1387,38 @@ static ssize_t aio_fsync(struct kiocb *i
return ret;
}

+static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb)
+{
+ ssize_t ret;
+
+ ret = rw_copy_check_uvector((struct iovec __user *)kiocb->ki_buf,
+ kiocb->ki_nbytes, 1,
+ &kiocb->ki_inline_vec, &kiocb->ki_iovec);
+ if (ret < 0)
+ goto out;
+
+ kiocb->ki_nr_segs = kiocb->ki_nbytes;
+ kiocb->ki_cur_seg = 0;
+ /* ki_nbytes/left now reflect bytes instead of segs */
+ kiocb->ki_nbytes = ret;
+ kiocb->ki_left = ret;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
+{
+ kiocb->ki_iovec = &kiocb->ki_inline_vec;
+ kiocb->ki_iovec->iov_base = kiocb->ki_buf;
+ kiocb->ki_iovec->iov_len = kiocb->ki_left;
+ kiocb->ki_nr_segs = 1;
+ kiocb->ki_cur_seg = 0;
+ kiocb->ki_nbytes = kiocb->ki_left;
+ return 0;
+}
+
/*
* aio_setup_iocb:
* Performs the initial checks and aio retry method
@@ -1412,9 +1441,12 @@ static ssize_t aio_setup_iocb(struct kio
ret = security_file_permission(file, MAY_READ);
if (unlikely(ret))
break;
+ ret = aio_setup_single_vector(kiocb);
+ if (ret)
+ break;
ret = -EINVAL;
if (file->f_op->aio_read)
- kiocb->ki_retry = aio_pread;
+ kiocb->ki_retry = aio_rw_vect_retry;
break;
case IOCB_CMD_PWRITE:
ret = -EBADF;
@@ -1427,9 +1459,40 @@ static ssize_t aio_setup_iocb(struct kio
ret = security_file_permission(file, MAY_WRITE);
if (unlikely(ret))
break;
+ ret = aio_setup_single_vector(kiocb);
+ if (ret)
+ break;
+ ret = -EINVAL;
+ if (file->f_op->aio_write)
+ kiocb->ki_retry = aio_rw_vect_retry;
+ break;
+ case IOCB_CMD_PREADV:
+ ret = -EBADF;
+ if (unlikely(!(file->f_mode & FMODE_READ)))
+ break;
+ ret = security_file_permission(file, MAY_READ);
+ if (unlikely(ret))
+ break;
+ ret = aio_setup_vectored_rw(kiocb);
+ if (ret)
+ break;
+ ret = -EINVAL;
+ if (file->f_op->aio_read)
+ kiocb->ki_retry = aio_rw_vect_retry;
+ break;
+ case IOCB_CMD_PWRITEV:
+ ret = -EBADF;
+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
+ break;
+ ret = security_file_permission(file, MAY_WRITE);
+ if (unlikely(ret))
+ break;
+ ret = aio_setup_vectored_rw(kiocb);
+ if (ret)
+ break;
ret = -EINVAL;
if (file->f_op->aio_write)
- kiocb->ki_retry = aio_pwrite;
+ kiocb->ki_retry = aio_rw_vect_retry;
break;
case IOCB_CMD_FDSYNC:
ret = -EINVAL;
Index: linux-2.6.17-rc3/include/linux/aio.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/aio.h 2006-05-02 07:53:58.000000000 -0700
+++ linux-2.6.17-rc3/include/linux/aio.h 2006-05-02 07:54:22.000000000 -0700
@@ -7,6 +7,7 @@
#include <linux/uio.h>

#include <asm/atomic.h>
+#include <linux/uio.h>

#define AIO_MAXSEGS 4
#define AIO_KIOGRP_NR_ATOMIC 8
@@ -114,6 +115,9 @@ struct kiocb {
long ki_kicked; /* just for testing */
long ki_queued; /* just for testing */
struct iovec ki_inline_vec; /* inline vector */
+ struct iovec *ki_iovec;
+ unsigned long ki_nr_segs;
+ unsigned long ki_cur_seg;

struct list_head ki_list; /* the aio core uses this
* for cancellation */
Index: linux-2.6.17-rc3/include/linux/aio_abi.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/aio_abi.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/include/linux/aio_abi.h 2006-05-02 07:54:22.000000000 -0700
@@ -41,6 +41,8 @@ enum {
* IOCB_CMD_POLL = 5,
*/
IOCB_CMD_NOOP = 6,
+ IOCB_CMD_PREADV = 7,
+ IOCB_CMD_PWRITEV = 8,
};

/* read() from /dev/aio returns these structures. */
Index: linux-2.6.17-rc3/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/read_write.c 2006-05-02 07:54:12.000000000 -0700
+++ linux-2.6.17-rc3/fs/read_write.c 2006-05-02 07:54:22.000000000 -0700
@@ -508,6 +508,74 @@ ssize_t do_loop_readv_writev(struct file
return ret;
}

+ssize_t rw_copy_check_uvector(const struct iovec __user * uvector,
+ unsigned long nr_segs, unsigned long fast_segs,
+ struct iovec *fast_pointer,
+ struct iovec **ret_pointer)
+ {
+ unsigned long seg;
+ ssize_t ret;
+ struct iovec *iov = fast_pointer;
+
+ /*
+ * SuS says "The readv() function *may* fail if the iovcnt argument
+ * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
+ * traditionally returned zero for zero segments, so...
+ */
+ if (nr_segs == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * First get the "struct iovec" from user memory and
+ * verify all the pointers
+ */
+ if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (nr_segs > fast_segs) {
+ iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+ if (iov == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /*
+ * According to the Single Unix Specification we should return EINVAL
+ * if an element length is < 0 when cast to ssize_t or if the
+ * total length would overflow the ssize_t return value of the
+ * system call.
+ */
+ ret = 0;
+ for (seg = 0; seg < nr_segs; seg++) {
+ void __user *buf = iov[seg].iov_base;
+ ssize_t len = (ssize_t)iov[seg].iov_len;
+
+ /* see if we we're about to use an invalid len or if
+ * it's about to overflow ssize_t */
+ if (len < 0 || (ret + len < ret)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret += len;
+ }
+out:
+ *ret_pointer = iov;
+ return ret;
+}
+
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)

@@ -519,64 +587,20 @@ static ssize_t do_readv_writev(int type,
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
ssize_t ret;
- int seg;
io_fn_t fn;
iov_fn_t fnv;

- /*
- * SuS says "The readv() function *may* fail if the iovcnt argument
- * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
- * traditionally returned zero for zero segments, so...
- */
- ret = 0;
- if (nr_segs == 0)
+ if (!file->f_op) {
+ ret = -EINVAL;
goto out;
-
- /*
- * First get the "struct iovec" from user memory and
- * verify all the pointers
- */
- ret = -EINVAL;
- if (nr_segs > UIO_MAXIOV)
- goto out;
- if (!file->f_op)
- goto out;
- if (nr_segs > UIO_FASTIOV) {
- ret = -ENOMEM;
- iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
- if (!iov)
- goto out;
}
- ret = -EFAULT;
- if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector)))
- goto out;

- /*
- * Single unix specification:
- * We should -EINVAL if an element length is not >= 0 and fitting an
- * ssize_t. The total length is fitting an ssize_t
- *
- * Be careful here because iov_len is a size_t not an ssize_t
- */
- tot_len = 0;
- ret = -EINVAL;
- for (seg = 0; seg < nr_segs; seg++) {
- void __user *buf = iov[seg].iov_base;
- ssize_t len = (ssize_t)iov[seg].iov_len;
-
- if (len < 0) /* size_t not fitting an ssize_t .. */
- goto out;
- if (unlikely(!access_ok(vrfy_dir(type), buf, len)))
- goto Efault;
- tot_len += len;
- if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */
- goto out;
- }
- if (tot_len == 0) {
- ret = 0;
+ ret = rw_copy_check_uvector(uvector, nr_segs, ARRAY_SIZE(iovstack),
+ iovstack, &iov);
+ if (ret <= 0)
goto out;
- }

+ tot_len = ret;
ret = rw_verify_area(type, file, pos, tot_len);
if (ret < 0)
goto out;
@@ -608,9 +632,6 @@ out:
fsnotify_modify(file->f_dentry);
}
return ret;
-Efault:
- ret = -EFAULT;
- goto out;
}

ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
Index: linux-2.6.17-rc3/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/fs.h 2006-05-02 07:54:12.000000000 -0700
+++ linux-2.6.17-rc3/include/linux/fs.h 2006-05-02 07:54:22.000000000 -0700
@@ -1068,6 +1068,11 @@ struct inode_operations {

struct seq_file;

+ssize_t rw_copy_check_uvector(const struct iovec __user * uvector,
+ unsigned long nr_segs, unsigned long fast_segs,
+ struct iovec *fast_pointer,
+ struct iovec **ret_pointer);
+
extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_readv(struct file *, const struct iovec __user *,


2006-05-02 15:19:49

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods

nfs: update nfs_file_read and nfs_file_write to the new vectored API

From: Chuck Lever <[email protected]>

Migrate NFS client's read and write file operations to use the new vectored
I/O API. Note that the direct I/O path supports only standard non-vectored
I/O for now.

Also fix some tab damage in the definition of nfs_file_operations, and
update dprintk's to reflect the true size of loff_t.

Test plan:
Standard read- and write- intensive workloads.

Signed-off-by: Chuck Lever <[email protected]>
---

fs/nfs/direct.c | 24 ++++++++++++++++++------
fs/nfs/file.c | 43 ++++++++++++++++++++++++-------------------
include/linux/nfs_fs.h | 8 ++++----
3 files changed, 46 insertions(+), 29 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 3c72b0c..e5707b3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -745,8 +745,8 @@ static ssize_t nfs_direct_write(struct k
/**
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
- * @buf: user's buffer into which to read data
- * @count: number of bytes to read
+ * @iov: vector of user buffers into which to read data
+ * @nr_segs: size of iov vector
* @pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
@@ -763,19 +763,25 @@ static ssize_t nfs_direct_write(struct k
* client must read the updated atime from the server back into its
* cache.
*/
-ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
ssize_t retval = -EINVAL;
int page_count;
struct page **pages;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
+ /* XXX: temporary */
+ const char __user *buf = iov[0].iov_base;
+ size_t count = iov[0].iov_len;

dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);

+ if (nr_segs != 1)
+ return -EINVAL;
+
if (count < 0)
goto out;
retval = -EFAULT;
@@ -807,8 +813,8 @@ out:
/**
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
- * @buf: user's buffer from which to write data
- * @count: number of bytes to write
+ * @iov: vector of user buffers from which to write data
+ * @nr_segs: size of iov vector
* @pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
@@ -829,19 +835,25 @@ out:
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
ssize_t retval;
int page_count;
struct page **pages;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
+ /* XXX: temporary */
+ const char __user *buf = iov[0].iov_base;
+ size_t count = iov[0].iov_len;

dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);

+ if (nr_segs != 1)
+ return -EINVAL;
+
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
goto out;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index fade02c..4fea6aa 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -41,8 +41,8 @@ static int nfs_file_release(struct inode
static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
static int nfs_file_mmap(struct file *, struct vm_area_struct *);
static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
-static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
-static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
+static ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos);
+static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos);
static int nfs_file_flush(struct file *);
static int nfs_fsync(struct file *, struct dentry *dentry, int datasync);
static int nfs_check_flags(int flags);
@@ -53,8 +53,8 @@ const struct file_operations nfs_file_op
.llseek = nfs_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .aio_read = nfs_file_read,
- .aio_write = nfs_file_write,
+ .aio_read = nfs_file_read,
+ .aio_write = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs_file_open,
.flush = nfs_file_flush,
@@ -212,26 +212,30 @@ nfs_file_flush(struct file *file)
return status;
}

-static ssize_t
-nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
+static ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
+ unsigned long seg;
+ size_t count = 0;
+
+ for (seg = 0; seg < nr_segs; seg++)
+ count += iov[seg].iov_len;

#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_read(iocb, buf, count, pos);
+ return nfs_file_direct_read(iocb, iov, nr_segs, pos);
#endif

- dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
+ dfprintk(VFS, "nfs: read(%s/%s, %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- (unsigned long) count, (unsigned long) pos);
+ (unsigned long) count, (long long) pos);

result = nfs_revalidate_file(inode, iocb->ki_filp);
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
if (!result)
- result = generic_file_aio_read(iocb, buf, count, pos);
+ result = generic_file_aio_read(iocb, iov, nr_segs, pos);
return result;
}

@@ -343,24 +347,25 @@ struct address_space_operations nfs_file
#endif
};

-/*
- * Write to a file (through the page cache).
- */
-static ssize_t
-nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
+ unsigned long seg;
+ size_t count = 0;
+
+ for (seg = 0; seg < nr_segs; seg++)
+ count += iov[seg].iov_len;

#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_write(iocb, buf, count, pos);
+ return nfs_file_direct_write(iocb, iov, nr_segs, pos);
#endif

- dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
+ dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- inode->i_ino, (unsigned long) count, (unsigned long) pos);
+ inode->i_ino, (unsigned long) count, (long long) pos);

result = -EBUSY;
if (IS_SWAPFILE(inode))
@@ -380,7 +385,7 @@ nfs_file_write(struct kiocb *iocb, const
goto out;

nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
- result = generic_file_aio_write(iocb, buf, count, pos);
+ result = generic_file_aio_write(iocb, iov, nr_segs, pos);
out:
return result;

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c71227d..f590a87 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -359,10 +359,10 @@ extern int nfs3_removexattr (struct dent
*/
extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
unsigned long);
-extern ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf,
- size_t count, loff_t pos);
-extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos);
+extern ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
+extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);

/*
* linux/fs/nfs/dir.c


Attachments:
04-nfs-vector-io.diff (8.04 kB)

2006-05-02 15:34:51

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods

On Tue, 2006-05-02 at 11:20 -0400, Chuck Lever wrote:
> If you apply this one, then the NFS client no longer builds.
>
> I think you might need to stub out vectored direct I/O support for the
> NFS client temporarily with something like the attached patch.
>

Yuck. I meant to send this one (with your temporary fix) - which is
the one I was testing earlier.

Thanks,
Badari

This patch vectorizes aio_read() and aio_write() methods to prepare
for collapsing all aio & vectored operations into one interface -
which is aio_read()/aio_write().


Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>

Documentation/filesystems/Locking | 5 +-
Documentation/filesystems/vfs.txt | 4 +-
drivers/char/raw.c | 14 -------
drivers/usb/gadget/inode.c | 71 +++++++++++++++++++++++++++-----------
fs/aio.c | 15 +++++---
fs/block_dev.c | 10 -----
fs/cifs/cifsfs.c | 6 +--
fs/ext3/file.c | 5 +-
fs/nfs/direct.c | 24 +++++++++---
fs/nfs/file.c | 43 ++++++++++++-----------
fs/read_write.c | 20 ++++++++--
fs/reiserfs/file.c | 8 ----
fs/xfs/linux-2.6/xfs_file.c | 44 +++++++++++------------
include/linux/aio.h | 2 +
include/linux/fs.h | 10 ++---
include/linux/nfs_fs.h | 8 ++--
include/net/sock.h | 1
mm/filemap.c | 39 ++++++++++----------
net/socket.c | 48 ++++++++++++-------------
19 files changed, 209 insertions(+), 168 deletions(-)

Index: linux-2.6.17-rc3/Documentation/filesystems/Locking
===================================================================
--- linux-2.6.17-rc3.orig/Documentation/filesystems/Locking 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/Documentation/filesystems/Locking 2006-05-02 07:53:58.000000000 -0700
@@ -355,10 +355,9 @@ The last two are called only from check_
prototypes:
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t,
- loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int,
Index: linux-2.6.17-rc3/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.17-rc3.orig/Documentation/filesystems/vfs.txt 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/Documentation/filesystems/vfs.txt 2006-05-02 07:53:58.000000000 -0700
@@ -699,9 +699,9 @@ This describes how the VFS can manipulat
struct file_operations {
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
Index: linux-2.6.17-rc3/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc3.orig/drivers/char/raw.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/drivers/char/raw.c 2006-05-02 08:28:50.000000000 -0700
@@ -250,23 +250,11 @@ static ssize_t raw_file_write(struct fil
return generic_file_write_nolock(file, &local_iov, 1, ppos);
}

-static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
-{
- struct iovec local_iov = {
- .iov_base = (char __user *)buf,
- .iov_len = count
- };
-
- return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-}
-
-
static struct file_operations raw_fops = {
.read = generic_file_read,
.aio_read = generic_file_aio_read,
.write = raw_file_write,
- .aio_write = raw_file_aio_write,
+ .aio_write = generic_file_aio_write_nolock,
.open = raw_open,
.release= raw_release,
.ioctl = raw_ioctl,
Index: linux-2.6.17-rc3/fs/aio.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/aio.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/aio.c 2006-05-02 08:28:47.000000000 -0700
@@ -15,6 +15,7 @@
#include <linux/aio_abi.h>
#include <linux/module.h>
#include <linux/syscalls.h>
+#include <linux/uio.h>

#define DEBUG 0

@@ -1315,8 +1316,11 @@ static ssize_t aio_pread(struct kiocb *i
ssize_t ret = 0;

do {
- ret = file->f_op->aio_read(iocb, iocb->ki_buf,
- iocb->ki_left, iocb->ki_pos);
+ iocb->ki_inline_vec.iov_base = iocb->ki_buf;
+ iocb->ki_inline_vec.iov_len = iocb->ki_left;
+
+ ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec,
+ 1, iocb->ki_pos);
/*
* Can't just depend on iocb->ki_left to determine
* whether we are done. This may have been a short read.
@@ -1349,8 +1353,11 @@ static ssize_t aio_pwrite(struct kiocb *
ssize_t ret = 0;

do {
- ret = file->f_op->aio_write(iocb, iocb->ki_buf,
- iocb->ki_left, iocb->ki_pos);
+ iocb->ki_inline_vec.iov_base = iocb->ki_buf;
+ iocb->ki_inline_vec.iov_len = iocb->ki_left;
+
+ ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec,
+ 1, iocb->ki_pos);
if (ret > 0) {
iocb->ki_buf += ret;
iocb->ki_left -= ret;
Index: linux-2.6.17-rc3/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/block_dev.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/block_dev.c 2006-05-02 08:28:50.000000000 -0700
@@ -1064,14 +1064,6 @@ static ssize_t blkdev_file_write(struct
return generic_file_write_nolock(file, &local_iov, 1, ppos);
}

-static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
-{
- struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-}
-
static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
@@ -1094,7 +1086,7 @@ const struct file_operations def_blk_fop
.read = generic_file_read,
.write = blkdev_file_write,
.aio_read = generic_file_aio_read,
- .aio_write = blkdev_file_aio_write,
+ .aio_write = generic_file_aio_write_nolock,
.mmap = generic_file_mmap,
.fsync = block_fsync,
.unlocked_ioctl = block_ioctl,
Index: linux-2.6.17-rc3/fs/cifs/cifsfs.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/cifs/cifsfs.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/cifs/cifsfs.c 2006-05-02 08:28:50.000000000 -0700
@@ -496,13 +496,13 @@ static ssize_t cifs_file_writev(struct f
return written;
}

-static ssize_t cifs_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
ssize_t written;

- written = generic_file_aio_write(iocb, buf, count, pos);
+ written = generic_file_aio_write(iocb, iov, nr_segs, pos);
if (!CIFS_I(inode)->clientCanCacheAll)
filemap_fdatawrite(inode->i_mapping);
return written;
Index: linux-2.6.17-rc3/fs/ext3/file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/ext3/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/ext3/file.c 2006-05-02 08:28:50.000000000 -0700
@@ -48,14 +48,15 @@ static int ext3_release_file (struct ino
}

static ssize_t
-ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_dentry->d_inode;
ssize_t ret;
int err;

- ret = generic_file_aio_write(iocb, buf, count, pos);
+ ret = generic_file_aio_write(iocb, iov, nr_segs, pos);

/*
* Skip flushing if there was an error, or if nothing was written.
Index: linux-2.6.17-rc3/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/read_write.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/read_write.c 2006-05-02 08:28:50.000000000 -0700
@@ -227,14 +227,20 @@ static void wait_on_retry_sync_kiocb(str

ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
+ struct iovec iov = { .iov_base = buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;

init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- while (-EIOCBRETRY ==
- (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
+ kiocb.ki_left = len;
+
+ for (;;) {
+ ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
wait_on_retry_sync_kiocb(&kiocb);
+ }

if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
@@ -279,14 +285,20 @@ EXPORT_SYMBOL(vfs_read);

ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{
+ struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;

init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- while (-EIOCBRETRY ==
- (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos)))
+ kiocb.ki_left = len;
+
+ for (;;) {
+ ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
wait_on_retry_sync_kiocb(&kiocb);
+ }

if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
Index: linux-2.6.17-rc3/fs/reiserfs/file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/reiserfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/reiserfs/file.c 2006-05-02 07:53:58.000000000 -0700
@@ -1560,12 +1560,6 @@ static ssize_t reiserfs_file_write(struc
return res;
}

-static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
- size_t count, loff_t pos)
-{
- return generic_file_aio_write(iocb, buf, count, pos);
-}
-
const struct file_operations reiserfs_file_operations = {
.read = generic_file_read,
.write = reiserfs_file_write,
@@ -1575,7 +1569,7 @@ const struct file_operations reiserfs_fi
.fsync = reiserfs_sync_file,
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
- .aio_write = reiserfs_aio_write,
+ .aio_write = generic_file_aio_write,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
Index: linux-2.6.17-rc3/fs/xfs/linux-2.6/xfs_file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/xfs/linux-2.6/xfs_file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/xfs/linux-2.6/xfs_file.c 2006-05-02 08:28:50.000000000 -0700
@@ -51,12 +51,11 @@ static struct vm_operations_struct xfs_d
STATIC inline ssize_t
__xfs_file_read(
struct kiocb *iocb,
- char __user *buf,
+ const struct iovec *iov,
+ unsigned long nr_segs,
int ioflags,
- size_t count,
loff_t pos)
{
- struct iovec iov = {buf, count};
struct file *file = iocb->ki_filp;
vnode_t *vp = vn_from_inode(file->f_dentry->d_inode);
ssize_t rval;
@@ -65,39 +64,38 @@ __xfs_file_read(

if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
- VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+ VOP_READ(vp, iocb, iov, nr_segs, &iocb->ki_pos, ioflags, NULL, rval);
return rval;
}

STATIC ssize_t
xfs_file_aio_read(
struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_read(iocb, buf, IO_ISAIO, count, pos);
+ return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
}

STATIC ssize_t
xfs_file_aio_read_invis(
struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+ return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

STATIC inline ssize_t
__xfs_file_write(
- struct kiocb *iocb,
- const char __user *buf,
- int ioflags,
- size_t count,
- loff_t pos)
+ struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs,
+ int ioflags,
+ loff_t pos)
{
- struct iovec iov = {(void __user *)buf, count};
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
vnode_t *vp = vn_from_inode(inode);
@@ -107,28 +105,28 @@ __xfs_file_write(
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;

- VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+ VOP_WRITE(vp, iocb, iov, nr_segs, &iocb->ki_pos, ioflags, NULL, rval);
return rval;
}

STATIC ssize_t
xfs_file_aio_write(
struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_write(iocb, buf, IO_ISAIO, count, pos);
+ return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
}

STATIC ssize_t
xfs_file_aio_write_invis(
struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+ return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

STATIC inline ssize_t
Index: linux-2.6.17-rc3/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/fs.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/include/linux/fs.h 2006-05-02 08:28:50.000000000 -0700
@@ -1015,9 +1015,9 @@ struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
@@ -1594,11 +1594,11 @@ extern int file_send_actor(read_descript
extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *);
-extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t);
+extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *);
-extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t);
+extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
- unsigned long, loff_t *);
+ unsigned long, loff_t);
extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
unsigned long *, loff_t, loff_t *, size_t, size_t);
extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
Index: linux-2.6.17-rc3/include/net/sock.h
===================================================================
--- linux-2.6.17-rc3.orig/include/net/sock.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/include/net/sock.h 2006-05-02 07:53:58.000000000 -0700
@@ -659,7 +659,6 @@ struct sock_iocb {
struct sock *sk;
struct scm_cookie *scm;
struct msghdr *msg, async_msg;
- struct iovec async_iov;
struct kiocb *kiocb;
};

Index: linux-2.6.17-rc3/mm/filemap.c
===================================================================
--- linux-2.6.17-rc3.orig/mm/filemap.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/mm/filemap.c 2006-05-02 08:28:50.000000000 -0700
@@ -1096,14 +1096,12 @@ out:
EXPORT_SYMBOL(__generic_file_aio_read);

ssize_t
-generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
- struct iovec local_iov = { .iov_base = buf, .iov_len = count };
-
BUG_ON(iocb->ki_pos != pos);
- return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
+ return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos);
}
-
EXPORT_SYMBOL(generic_file_aio_read);

ssize_t
@@ -2163,22 +2161,21 @@ out:
current->backing_dev_info = NULL;
return written ? written : err;
}
-EXPORT_SYMBOL(generic_file_aio_write_nolock);

-ssize_t
-generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
+ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
+ const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- loff_t pos = *ppos;

- ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos);
+ BUG_ON(iocb->ki_pos != pos);
+
+ ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);

if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
+ ssize_t err;

err = sync_page_range_nolock(inode, mapping, pos, ret);
if (err < 0)
@@ -2186,6 +2183,7 @@ generic_file_aio_write_nolock(struct kio
}
return ret;
}
+EXPORT_SYMBOL(generic_file_aio_write_nolock);

static ssize_t
__generic_file_write_nolock(struct file *file, const struct iovec *iov,
@@ -2195,9 +2193,11 @@ __generic_file_write_nolock(struct file
ssize_t ret;

init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = *ppos;
ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
- if (ret == -EIOCBQUEUED)
+ if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
return ret;
}

@@ -2209,28 +2209,27 @@ generic_file_write_nolock(struct file *f
ssize_t ret;

init_sync_kiocb(&kiocb, file);
- ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
+ kiocb.ki_pos = *ppos;
+ ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, *ppos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
return ret;
}
EXPORT_SYMBOL(generic_file_write_nolock);

-ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };

BUG_ON(iocb->ki_pos != pos);

mutex_lock(&inode->i_mutex);
- ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1,
- &iocb->ki_pos);
+ ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);

if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
Index: linux-2.6.17-rc3/net/socket.c
===================================================================
--- linux-2.6.17-rc3.orig/net/socket.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/net/socket.c 2006-05-02 08:28:50.000000000 -0700
@@ -96,10 +96,10 @@
#include <linux/netfilter.h>

static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
- size_t size, loff_t pos);
-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t size, loff_t pos);
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
static int sock_mmap(struct file *file, struct vm_area_struct * vma);

static int sock_close(struct inode *inode, struct file *file);
@@ -700,7 +700,7 @@ static ssize_t sock_sendpage(struct file
}

static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
- char __user *ubuf, size_t size, struct sock_iocb *siocb)
+ struct sock_iocb *siocb)
{
if (!is_sync_kiocb(iocb)) {
siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
@@ -710,15 +710,13 @@ static struct sock_iocb *alloc_sock_iocb
}

siocb->kiocb = iocb;
- siocb->async_iov.iov_base = ubuf;
- siocb->async_iov.iov_len = size;
-
iocb->private = siocb;
return siocb;
}

static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
- struct file *file, struct iovec *iov, unsigned long nr_segs)
+ struct file *file, const struct iovec *iov,
+ unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
@@ -749,31 +747,33 @@ static ssize_t sock_readv(struct file *f
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;

- ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+ ret = do_sock_read(&msg, &iocb, file, iov, nr_segs);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}

-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
- size_t count, loff_t pos)
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;

if (pos != 0)
return -ESPIPE;
- if (count == 0) /* Match SYS5 behaviour */
+
+ if (iocb->ki_left == 0) /* Match SYS5 behaviour */
return 0;

- x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
+
+ x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;
- return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
- &x->async_iov, 1);
+ return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}

static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
- struct file *file, struct iovec *iov, unsigned long nr_segs)
+ struct file *file, const struct iovec *iov,
+ unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
@@ -806,28 +806,28 @@ static ssize_t sock_writev(struct file *
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;

- ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+ ret = do_sock_write(&msg, &iocb, file, iov, nr_segs);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}

-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
- size_t count, loff_t pos)
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;

if (pos != 0)
return -ESPIPE;
- if (count == 0) /* Match SYS5 behaviour */
+
+ if (iocb->ki_left == 0) /* Match SYS5 behaviour */
return 0;

- x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
+ x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;

- return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
- &x->async_iov, 1);
+ return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}


Index: linux-2.6.17-rc3/drivers/usb/gadget/inode.c
===================================================================
--- linux-2.6.17-rc3.orig/drivers/usb/gadget/inode.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/drivers/usb/gadget/inode.c 2006-05-02 07:53:58.000000000 -0700
@@ -528,7 +528,8 @@ struct kiocb_priv {
struct usb_request *req;
struct ep_data *epdata;
void *buf;
- char __user *ubuf;
+ struct iovec *iv;
+ unsigned long count;
unsigned actual;
};

@@ -556,18 +557,32 @@ static int ep_aio_cancel(struct kiocb *i
static ssize_t ep_aio_read_retry(struct kiocb *iocb)
{
struct kiocb_priv *priv = iocb->private;
- ssize_t status = priv->actual;
+ ssize_t len, total;

/* we "retry" to get the right mm context for this: */
- status = copy_to_user(priv->ubuf, priv->buf, priv->actual);
- if (unlikely(0 != status))
- status = -EFAULT;
- else
- status = priv->actual;
+
+ /* copy stuff into user buffers */
+ total = priv->actual;
+ len = 0;
+ for (i=0; i < priv->count; i++) {
+ ssize_t this = min(priv->iv[i].iov_len, (size_t)total);
+
+ if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this))
+ break;
+
+ total -= this;
+ len += this;
+ if (total <= 0)
+ break;
+ }
+
+ if (unlikely(len != 0))
+ len = -EFAULT;
+
kfree(priv->buf);
kfree(priv);
aio_put_req(iocb);
- return status;
+ return len;
}

static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
@@ -615,7 +630,8 @@ ep_aio_rwtail(
char *buf,
size_t len,
struct ep_data *epdata,
- char __user *ubuf
+ const struct iovec *iv,
+ unsigned long count
)
{
struct kiocb_priv *priv = (void *) &iocb->private;
@@ -630,7 +646,8 @@ fail:
return value;
}
iocb->private = priv;
- priv->ubuf = ubuf;
+ priv->iovec = iv;
+ priv->count = count;

value = get_ready_ep(iocb->ki_filp->f_flags, epdata);
if (unlikely(value < 0)) {
@@ -675,36 +692,52 @@ fail:
}

static ssize_t
-ep_aio_read(struct kiocb *iocb, char __user *ubuf, size_t len, loff_t o)
+ep_aio_read(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t o)
{
struct ep_data *epdata = iocb->ki_filp->private_data;
char *buf;
+ size_t len;
+ int i = 0;
+ ssize_t ret;

if (unlikely(epdata->desc.bEndpointAddress & USB_DIR_IN))
return -EINVAL;
- buf = kmalloc(len, GFP_KERNEL);
+
+ buf = kmalloc(iocb->ki_left, GFP_KERNEL);
if (unlikely(!buf))
return -ENOMEM;
+
iocb->ki_retry = ep_aio_read_retry;
- return ep_aio_rwtail(iocb, buf, len, epdata, ubuf);
+ return ep_aio_rwtail(iocb, buf, len, epdata, iv, count);
}

static ssize_t
-ep_aio_write(struct kiocb *iocb, const char __user *ubuf, size_t len, loff_t o)
+ep_aio_write(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t o)
{
struct ep_data *epdata = iocb->ki_filp->private_data;
char *buf;
+ size_t len = 0;
+ int i = 0;
+ ssize_t ret;

if (unlikely(!(epdata->desc.bEndpointAddress & USB_DIR_IN)))
return -EINVAL;
- buf = kmalloc(len, GFP_KERNEL);
+
+ buf = kmalloc(iocb->ki_left, GFP_KERNEL);
if (unlikely(!buf))
return -ENOMEM;
- if (unlikely(copy_from_user(buf, ubuf, len) != 0)) {
- kfree(buf);
- return -EFAULT;
+
+ for (i=0; i < count; i++) {
+ if (unlikely(copy_from_user(&buf[len], iv[i]->iov_base,
+ iv[i]->iov_len) != 0)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+ len += iv[i]->iov_len;
}
- return ep_aio_rwtail(iocb, buf, len, epdata, NULL);
+ return ep_aio_rwtail(iocb, buf, len, epdata, NULL, 0);
}

/*----------------------------------------------------------------------*/
Index: linux-2.6.17-rc3/include/linux/aio.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/aio.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/include/linux/aio.h 2006-05-02 08:28:47.000000000 -0700
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/aio_abi.h>
+#include <linux/uio.h>

#include <asm/atomic.h>

@@ -112,6 +113,7 @@ struct kiocb {
long ki_retried; /* just for testing */
long ki_kicked; /* just for testing */
long ki_queued; /* just for testing */
+ struct iovec ki_inline_vec; /* inline vector */

struct list_head ki_list; /* the aio core uses this
* for cancellation */
Index: linux-2.6.17-rc3/fs/nfs/direct.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/nfs/direct.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/nfs/direct.c 2006-05-02 08:31:58.000000000 -0700
@@ -745,8 +745,8 @@ static ssize_t nfs_direct_write(struct k
/**
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
- * @buf: user's buffer into which to read data
- * @count: number of bytes to read
+ * @iov: vector of user buffers into which to read data
+ * @nr_segs: size of iov vector
* @pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
@@ -763,19 +763,25 @@ static ssize_t nfs_direct_write(struct k
* client must read the updated atime from the server back into its
* cache.
*/
-ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
ssize_t retval = -EINVAL;
int page_count;
struct page **pages;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
+ /* XXX: temporary */
+ const char __user *buf = iov[0].iov_base;
+ size_t count = iov[0].iov_len;

dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);

+ if (nr_segs != 1)
+ return -EINVAL;
+
if (count < 0)
goto out;
retval = -EFAULT;
@@ -807,8 +813,8 @@ out:
/**
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
- * @buf: user's buffer from which to write data
- * @count: number of bytes to write
+ * @iov: vector of user buffers from which to write data
+ * @nr_segs: size of iov vector
* @pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
@@ -829,19 +835,25 @@ out:
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
ssize_t retval;
int page_count;
struct page **pages;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
+ /* XXX: temporary */
+ const char __user *buf = iov[0].iov_base;
+ size_t count = iov[0].iov_len;

dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);

+ if (nr_segs != 1)
+ return -EINVAL;
+
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
goto out;
Index: linux-2.6.17-rc3/fs/nfs/file.c
===================================================================
--- linux-2.6.17-rc3.orig/fs/nfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/fs/nfs/file.c 2006-05-02 08:31:58.000000000 -0700
@@ -41,8 +41,8 @@ static int nfs_file_release(struct inode
static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
static int nfs_file_mmap(struct file *, struct vm_area_struct *);
static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
-static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
-static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
+static ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos);
+static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos);
static int nfs_file_flush(struct file *);
static int nfs_fsync(struct file *, struct dentry *dentry, int datasync);
static int nfs_check_flags(int flags);
@@ -53,8 +53,8 @@ const struct file_operations nfs_file_op
.llseek = nfs_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .aio_read = nfs_file_read,
- .aio_write = nfs_file_write,
+ .aio_read = nfs_file_read,
+ .aio_write = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs_file_open,
.flush = nfs_file_flush,
@@ -212,26 +212,30 @@ nfs_file_flush(struct file *file)
return status;
}

-static ssize_t
-nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
+static ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
+ unsigned long seg;
+ size_t count = 0;
+
+ for (seg = 0; seg < nr_segs; seg++)
+ count += iov[seg].iov_len;

#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_read(iocb, buf, count, pos);
+ return nfs_file_direct_read(iocb, iov, nr_segs, pos);
#endif

- dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
+ dfprintk(VFS, "nfs: read(%s/%s, %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- (unsigned long) count, (unsigned long) pos);
+ (unsigned long) count, (long long) pos);

result = nfs_revalidate_file(inode, iocb->ki_filp);
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
if (!result)
- result = generic_file_aio_read(iocb, buf, count, pos);
+ result = generic_file_aio_read(iocb, iov, nr_segs, pos);
return result;
}

@@ -343,24 +347,25 @@ struct address_space_operations nfs_file
#endif
};

-/*
- * Write to a file (through the page cache).
- */
-static ssize_t
-nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
+ unsigned long seg;
+ size_t count = 0;
+
+ for (seg = 0; seg < nr_segs; seg++)
+ count += iov[seg].iov_len;

#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_write(iocb, buf, count, pos);
+ return nfs_file_direct_write(iocb, iov, nr_segs, pos);
#endif

- dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
+ dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- inode->i_ino, (unsigned long) count, (unsigned long) pos);
+ inode->i_ino, (unsigned long) count, (long long) pos);

result = -EBUSY;
if (IS_SWAPFILE(inode))
@@ -380,7 +385,7 @@ nfs_file_write(struct kiocb *iocb, const
goto out;

nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
- result = generic_file_aio_write(iocb, buf, count, pos);
+ result = generic_file_aio_write(iocb, iov, nr_segs, pos);
out:
return result;

Index: linux-2.6.17-rc3/include/linux/nfs_fs.h
===================================================================
--- linux-2.6.17-rc3.orig/include/linux/nfs_fs.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3/include/linux/nfs_fs.h 2006-05-02 08:31:58.000000000 -0700
@@ -359,10 +359,10 @@ extern int nfs3_removexattr (struct dent
*/
extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
unsigned long);
-extern ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf,
- size_t count, loff_t pos);
-extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos);
+extern ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
+extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);

/*
* linux/fs/nfs/dir.c


2006-05-09 18:02:40

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 0/3] VFS changes to collapse AIO and vectored IO into single (set of) fileops.

Hi,

These series of patches collapses all the vectored IO support into
single set of file-operation method using aio_read/aio_write.
This work was originally suggested & started by Christoph Hellwig,
when Zach Brown tried to add vectored support for AIO.

Here is the summary:

[PATCH 1/3] Vectorize aio_read/aio_write methods

[PATCH 2/3] Remove readv/writev methods and use aio_read/aio_write
instead.

[PATCH 3/3] Zach's core aio changes to support vectored AIO.

BTW, Chuck Lever is actually re-arranging NFS DIO, AIO code to
fit into this model.

Thanks to Chuck Lever and Shaggy for tracking down the latest
set of issues :)

I ran various testing including LTP on this series. Andrew,
can you include these in -mm tree ?

Thanks,
Badari


2006-05-09 18:06:06

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 1/3] Vectorize aio_read/aio_write methods

This patch vectorizes aio_read() and aio_write() methods to prepare
for collapsing all aio & vectored operations into one interface -
which is aio_read()/aio_write().


Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>

Documentation/filesystems/Locking | 5 +-
Documentation/filesystems/vfs.txt | 4 +-
drivers/char/raw.c | 14 -------
drivers/usb/gadget/inode.c | 71 +++++++++++++++++++++++++++-----------
fs/aio.c | 15 +++++---
fs/block_dev.c | 10 -----
fs/cifs/cifsfs.c | 6 +--
fs/ext3/file.c | 5 +-
fs/nfs/direct.c | 24 +++++++++---
fs/nfs/file.c | 43 ++++++++++++-----------
fs/ntfs/file.c | 8 +---
fs/ocfs2/file.c | 28 ++++++--------
fs/read_write.c | 20 ++++++++--
fs/reiserfs/file.c | 8 ----
fs/xfs/linux-2.6/xfs_file.c | 44 +++++++++++------------
include/linux/aio.h | 2 +
include/linux/fs.h | 10 ++---
include/linux/nfs_fs.h | 8 ++--
include/net/sock.h | 1
mm/filemap.c | 38 +++++++++-----------
net/socket.c | 48 ++++++++++++-------------
21 files changed, 224 insertions(+), 188 deletions(-)

Index: linux-2.6.17-rc3.save/Documentation/filesystems/Locking
===================================================================
--- linux-2.6.17-rc3.save.orig/Documentation/filesystems/Locking 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/Documentation/filesystems/Locking 2006-05-02 07:53:58.000000000 -0700
@@ -355,10 +355,9 @@ The last two are called only from check_
prototypes:
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t,
- loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int,
Index: linux-2.6.17-rc3.save/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.17-rc3.save.orig/Documentation/filesystems/vfs.txt 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/Documentation/filesystems/vfs.txt 2006-05-02 07:53:58.000000000 -0700
@@ -699,9 +699,9 @@ This describes how the VFS can manipulat
struct file_operations {
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
Index: linux-2.6.17-rc3.save/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc3.save.orig/drivers/char/raw.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/drivers/char/raw.c 2006-05-09 10:58:58.000000000 -0700
@@ -250,23 +250,11 @@ static ssize_t raw_file_write(struct fil
return generic_file_write_nolock(file, &local_iov, 1, ppos);
}

-static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
-{
- struct iovec local_iov = {
- .iov_base = (char __user *)buf,
- .iov_len = count
- };
-
- return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-}
-
-
static struct file_operations raw_fops = {
.read = generic_file_read,
.aio_read = generic_file_aio_read,
.write = raw_file_write,
- .aio_write = raw_file_aio_write,
+ .aio_write = generic_file_aio_write_nolock,
.open = raw_open,
.release= raw_release,
.ioctl = raw_ioctl,
Index: linux-2.6.17-rc3.save/fs/aio.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/aio.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/aio.c 2006-05-09 10:58:53.000000000 -0700
@@ -15,6 +15,7 @@
#include <linux/aio_abi.h>
#include <linux/module.h>
#include <linux/syscalls.h>
+#include <linux/uio.h>

#define DEBUG 0

@@ -1315,8 +1316,11 @@ static ssize_t aio_pread(struct kiocb *i
ssize_t ret = 0;

do {
- ret = file->f_op->aio_read(iocb, iocb->ki_buf,
- iocb->ki_left, iocb->ki_pos);
+ iocb->ki_inline_vec.iov_base = iocb->ki_buf;
+ iocb->ki_inline_vec.iov_len = iocb->ki_left;
+
+ ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec,
+ 1, iocb->ki_pos);
/*
* Can't just depend on iocb->ki_left to determine
* whether we are done. This may have been a short read.
@@ -1349,8 +1353,11 @@ static ssize_t aio_pwrite(struct kiocb *
ssize_t ret = 0;

do {
- ret = file->f_op->aio_write(iocb, iocb->ki_buf,
- iocb->ki_left, iocb->ki_pos);
+ iocb->ki_inline_vec.iov_base = iocb->ki_buf;
+ iocb->ki_inline_vec.iov_len = iocb->ki_left;
+
+ ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec,
+ 1, iocb->ki_pos);
if (ret > 0) {
iocb->ki_buf += ret;
iocb->ki_left -= ret;
Index: linux-2.6.17-rc3.save/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/block_dev.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/block_dev.c 2006-05-09 10:58:58.000000000 -0700
@@ -1064,14 +1064,6 @@ static ssize_t blkdev_file_write(struct
return generic_file_write_nolock(file, &local_iov, 1, ppos);
}

-static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
-{
- struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-}
-
static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
@@ -1094,7 +1086,7 @@ const struct file_operations def_blk_fop
.read = generic_file_read,
.write = blkdev_file_write,
.aio_read = generic_file_aio_read,
- .aio_write = blkdev_file_aio_write,
+ .aio_write = generic_file_aio_write_nolock,
.mmap = generic_file_mmap,
.fsync = block_fsync,
.unlocked_ioctl = block_ioctl,
Index: linux-2.6.17-rc3.save/fs/cifs/cifsfs.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/cifs/cifsfs.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/cifs/cifsfs.c 2006-05-09 10:58:58.000000000 -0700
@@ -496,13 +496,13 @@ static ssize_t cifs_file_writev(struct f
return written;
}

-static ssize_t cifs_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
ssize_t written;

- written = generic_file_aio_write(iocb, buf, count, pos);
+ written = generic_file_aio_write(iocb, iov, nr_segs, pos);
if (!CIFS_I(inode)->clientCanCacheAll)
filemap_fdatawrite(inode->i_mapping);
return written;
Index: linux-2.6.17-rc3.save/fs/ext3/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ext3/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ext3/file.c 2006-05-09 10:58:58.000000000 -0700
@@ -48,14 +48,15 @@ static int ext3_release_file (struct ino
}

static ssize_t
-ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_dentry->d_inode;
ssize_t ret;
int err;

- ret = generic_file_aio_write(iocb, buf, count, pos);
+ ret = generic_file_aio_write(iocb, iov, nr_segs, pos);

/*
* Skip flushing if there was an error, or if nothing was written.
Index: linux-2.6.17-rc3.save/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/read_write.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/read_write.c 2006-05-09 10:58:58.000000000 -0700
@@ -227,14 +227,20 @@ static void wait_on_retry_sync_kiocb(str

ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
+ struct iovec iov = { .iov_base = buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;

init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- while (-EIOCBRETRY ==
- (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
+ kiocb.ki_left = len;
+
+ for (;;) {
+ ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
wait_on_retry_sync_kiocb(&kiocb);
+ }

if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
@@ -279,14 +285,20 @@ EXPORT_SYMBOL(vfs_read);

ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{
+ struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;

init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- while (-EIOCBRETRY ==
- (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos)))
+ kiocb.ki_left = len;
+
+ for (;;) {
+ ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
wait_on_retry_sync_kiocb(&kiocb);
+ }

if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
Index: linux-2.6.17-rc3.save/fs/reiserfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/reiserfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/reiserfs/file.c 2006-05-02 07:53:58.000000000 -0700
@@ -1560,12 +1560,6 @@ static ssize_t reiserfs_file_write(struc
return res;
}

-static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
- size_t count, loff_t pos)
-{
- return generic_file_aio_write(iocb, buf, count, pos);
-}
-
const struct file_operations reiserfs_file_operations = {
.read = generic_file_read,
.write = reiserfs_file_write,
@@ -1575,7 +1569,7 @@ const struct file_operations reiserfs_fi
.fsync = reiserfs_sync_file,
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
- .aio_write = reiserfs_aio_write,
+ .aio_write = generic_file_aio_write,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
Index: linux-2.6.17-rc3.save/fs/xfs/linux-2.6/xfs_file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/xfs/linux-2.6/xfs_file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/xfs/linux-2.6/xfs_file.c 2006-05-09 10:58:58.000000000 -0700
@@ -51,12 +51,11 @@ static struct vm_operations_struct xfs_d
STATIC inline ssize_t
__xfs_file_read(
struct kiocb *iocb,
- char __user *buf,
+ const struct iovec *iov,
+ unsigned long nr_segs,
int ioflags,
- size_t count,
loff_t pos)
{
- struct iovec iov = {buf, count};
struct file *file = iocb->ki_filp;
vnode_t *vp = vn_from_inode(file->f_dentry->d_inode);
ssize_t rval;
@@ -65,39 +64,38 @@ __xfs_file_read(

if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
- VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+ VOP_READ(vp, iocb, iov, nr_segs, &iocb->ki_pos, ioflags, NULL, rval);
return rval;
}

STATIC ssize_t
xfs_file_aio_read(
struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_read(iocb, buf, IO_ISAIO, count, pos);
+ return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
}

STATIC ssize_t
xfs_file_aio_read_invis(
struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+ return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

STATIC inline ssize_t
__xfs_file_write(
- struct kiocb *iocb,
- const char __user *buf,
- int ioflags,
- size_t count,
- loff_t pos)
+ struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs,
+ int ioflags,
+ loff_t pos)
{
- struct iovec iov = {(void __user *)buf, count};
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
vnode_t *vp = vn_from_inode(inode);
@@ -107,28 +105,28 @@ __xfs_file_write(
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;

- VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+ VOP_WRITE(vp, iocb, iov, nr_segs, &iocb->ki_pos, ioflags, NULL, rval);
return rval;
}

STATIC ssize_t
xfs_file_aio_write(
struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_write(iocb, buf, IO_ISAIO, count, pos);
+ return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
}

STATIC ssize_t
xfs_file_aio_write_invis(
struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+ return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

STATIC inline ssize_t
Index: linux-2.6.17-rc3.save/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/fs.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/fs.h 2006-05-09 10:58:58.000000000 -0700
@@ -1015,9 +1015,9 @@ struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
@@ -1594,11 +1594,11 @@ extern int file_send_actor(read_descript
extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *);
-extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t);
+extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *);
-extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t);
+extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
- unsigned long, loff_t *);
+ unsigned long, loff_t);
extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
unsigned long *, loff_t, loff_t *, size_t, size_t);
extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
Index: linux-2.6.17-rc3.save/include/net/sock.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/net/sock.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/include/net/sock.h 2006-05-02 07:53:58.000000000 -0700
@@ -659,7 +659,6 @@ struct sock_iocb {
struct sock *sk;
struct scm_cookie *scm;
struct msghdr *msg, async_msg;
- struct iovec async_iov;
struct kiocb *kiocb;
};

Index: linux-2.6.17-rc3.save/mm/filemap.c
===================================================================
--- linux-2.6.17-rc3.save.orig/mm/filemap.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/mm/filemap.c 2006-05-09 10:58:58.000000000 -0700
@@ -1096,14 +1096,12 @@ out:
EXPORT_SYMBOL(__generic_file_aio_read);

ssize_t
-generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
- struct iovec local_iov = { .iov_base = buf, .iov_len = count };
-
BUG_ON(iocb->ki_pos != pos);
- return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
+ return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos);
}
-
EXPORT_SYMBOL(generic_file_aio_read);

ssize_t
@@ -2163,22 +2161,21 @@ out:
current->backing_dev_info = NULL;
return written ? written : err;
}
-EXPORT_SYMBOL(generic_file_aio_write_nolock);

-ssize_t
-generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
+ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
+ const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- loff_t pos = *ppos;

- ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos);
+ BUG_ON(iocb->ki_pos != pos);
+
+ ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);

if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
+ ssize_t err;

err = sync_page_range_nolock(inode, mapping, pos, ret);
if (err < 0)
@@ -2186,6 +2183,7 @@ generic_file_aio_write_nolock(struct kio
}
return ret;
}
+EXPORT_SYMBOL(generic_file_aio_write_nolock);

static ssize_t
__generic_file_write_nolock(struct file *file, const struct iovec *iov,
@@ -2195,8 +2193,9 @@ __generic_file_write_nolock(struct file
ssize_t ret;

init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = *ppos;
ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
- if (ret == -EIOCBQUEUED)
+ if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
return ret;
}
@@ -2209,28 +2208,27 @@ generic_file_write_nolock(struct file *f
ssize_t ret;

init_sync_kiocb(&kiocb, file);
- ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
+ kiocb.ki_pos = *ppos;
+ ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, *ppos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
return ret;
}
EXPORT_SYMBOL(generic_file_write_nolock);

-ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };

BUG_ON(iocb->ki_pos != pos);

mutex_lock(&inode->i_mutex);
- ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1,
- &iocb->ki_pos);
+ ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);

if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
Index: linux-2.6.17-rc3.save/net/socket.c
===================================================================
--- linux-2.6.17-rc3.save.orig/net/socket.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/net/socket.c 2006-05-09 10:58:58.000000000 -0700
@@ -96,10 +96,10 @@
#include <linux/netfilter.h>

static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
- size_t size, loff_t pos);
-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t size, loff_t pos);
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
static int sock_mmap(struct file *file, struct vm_area_struct * vma);

static int sock_close(struct inode *inode, struct file *file);
@@ -700,7 +700,7 @@ static ssize_t sock_sendpage(struct file
}

static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
- char __user *ubuf, size_t size, struct sock_iocb *siocb)
+ struct sock_iocb *siocb)
{
if (!is_sync_kiocb(iocb)) {
siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
@@ -710,15 +710,13 @@ static struct sock_iocb *alloc_sock_iocb
}

siocb->kiocb = iocb;
- siocb->async_iov.iov_base = ubuf;
- siocb->async_iov.iov_len = size;
-
iocb->private = siocb;
return siocb;
}

static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
- struct file *file, struct iovec *iov, unsigned long nr_segs)
+ struct file *file, const struct iovec *iov,
+ unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
@@ -749,31 +747,33 @@ static ssize_t sock_readv(struct file *f
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;

- ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+ ret = do_sock_read(&msg, &iocb, file, iov, nr_segs);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}

-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
- size_t count, loff_t pos)
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;

if (pos != 0)
return -ESPIPE;
- if (count == 0) /* Match SYS5 behaviour */
+
+ if (iocb->ki_left == 0) /* Match SYS5 behaviour */
return 0;

- x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
+
+ x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;
- return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
- &x->async_iov, 1);
+ return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}

static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
- struct file *file, struct iovec *iov, unsigned long nr_segs)
+ struct file *file, const struct iovec *iov,
+ unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
@@ -806,28 +806,28 @@ static ssize_t sock_writev(struct file *
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;

- ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+ ret = do_sock_write(&msg, &iocb, file, iov, nr_segs);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}

-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
- size_t count, loff_t pos)
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;

if (pos != 0)
return -ESPIPE;
- if (count == 0) /* Match SYS5 behaviour */
+
+ if (iocb->ki_left == 0) /* Match SYS5 behaviour */
return 0;

- x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
+ x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;

- return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
- &x->async_iov, 1);
+ return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}


Index: linux-2.6.17-rc3.save/drivers/usb/gadget/inode.c
===================================================================
--- linux-2.6.17-rc3.save.orig/drivers/usb/gadget/inode.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/drivers/usb/gadget/inode.c 2006-05-02 07:53:58.000000000 -0700
@@ -528,7 +528,8 @@ struct kiocb_priv {
struct usb_request *req;
struct ep_data *epdata;
void *buf;
- char __user *ubuf;
+ struct iovec *iv;
+ unsigned long count;
unsigned actual;
};

@@ -556,18 +557,32 @@ static int ep_aio_cancel(struct kiocb *i
static ssize_t ep_aio_read_retry(struct kiocb *iocb)
{
struct kiocb_priv *priv = iocb->private;
- ssize_t status = priv->actual;
+ ssize_t len, total;

/* we "retry" to get the right mm context for this: */
- status = copy_to_user(priv->ubuf, priv->buf, priv->actual);
- if (unlikely(0 != status))
- status = -EFAULT;
- else
- status = priv->actual;
+
+ /* copy stuff into user buffers */
+ total = priv->actual;
+ len = 0;
+ for (i=0; i < priv->count; i++) {
+ ssize_t this = min(priv->iv[i].iov_len, (size_t)total);
+
+ if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this))
+ break;
+
+ total -= this;
+ len += this;
+ if (total <= 0)
+ break;
+ }
+
+ if (unlikely(len != 0))
+ len = -EFAULT;
+
kfree(priv->buf);
kfree(priv);
aio_put_req(iocb);
- return status;
+ return len;
}

static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
@@ -615,7 +630,8 @@ ep_aio_rwtail(
char *buf,
size_t len,
struct ep_data *epdata,
- char __user *ubuf
+ const struct iovec *iv,
+ unsigned long count
)
{
struct kiocb_priv *priv = (void *) &iocb->private;
@@ -630,7 +646,8 @@ fail:
return value;
}
iocb->private = priv;
- priv->ubuf = ubuf;
+ priv->iovec = iv;
+ priv->count = count;

value = get_ready_ep(iocb->ki_filp->f_flags, epdata);
if (unlikely(value < 0)) {
@@ -675,36 +692,52 @@ fail:
}

static ssize_t
-ep_aio_read(struct kiocb *iocb, char __user *ubuf, size_t len, loff_t o)
+ep_aio_read(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t o)
{
struct ep_data *epdata = iocb->ki_filp->private_data;
char *buf;
+ size_t len;
+ int i = 0;
+ ssize_t ret;

if (unlikely(epdata->desc.bEndpointAddress & USB_DIR_IN))
return -EINVAL;
- buf = kmalloc(len, GFP_KERNEL);
+
+ buf = kmalloc(iocb->ki_left, GFP_KERNEL);
if (unlikely(!buf))
return -ENOMEM;
+
iocb->ki_retry = ep_aio_read_retry;
- return ep_aio_rwtail(iocb, buf, len, epdata, ubuf);
+ return ep_aio_rwtail(iocb, buf, len, epdata, iv, count);
}

static ssize_t
-ep_aio_write(struct kiocb *iocb, const char __user *ubuf, size_t len, loff_t o)
+ep_aio_write(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t o)
{
struct ep_data *epdata = iocb->ki_filp->private_data;
char *buf;
+ size_t len = 0;
+ int i = 0;
+ ssize_t ret;

if (unlikely(!(epdata->desc.bEndpointAddress & USB_DIR_IN)))
return -EINVAL;
- buf = kmalloc(len, GFP_KERNEL);
+
+ buf = kmalloc(iocb->ki_left, GFP_KERNEL);
if (unlikely(!buf))
return -ENOMEM;
- if (unlikely(copy_from_user(buf, ubuf, len) != 0)) {
- kfree(buf);
- return -EFAULT;
+
+ for (i=0; i < count; i++) {
+ if (unlikely(copy_from_user(&buf[len], iv[i]->iov_base,
+ iv[i]->iov_len) != 0)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+ len += iv[i]->iov_len;
}
- return ep_aio_rwtail(iocb, buf, len, epdata, NULL);
+ return ep_aio_rwtail(iocb, buf, len, epdata, NULL, 0);
}

/*----------------------------------------------------------------------*/
Index: linux-2.6.17-rc3.save/include/linux/aio.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/aio.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/aio.h 2006-05-09 10:58:53.000000000 -0700
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/aio_abi.h>
+#include <linux/uio.h>

#include <asm/atomic.h>

@@ -112,6 +113,7 @@ struct kiocb {
long ki_retried; /* just for testing */
long ki_kicked; /* just for testing */
long ki_queued; /* just for testing */
+ struct iovec ki_inline_vec; /* inline vector */

struct list_head ki_list; /* the aio core uses this
* for cancellation */
Index: linux-2.6.17-rc3.save/fs/nfs/direct.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/nfs/direct.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/nfs/direct.c 2006-05-02 08:31:58.000000000 -0700
@@ -745,8 +745,8 @@ static ssize_t nfs_direct_write(struct k
/**
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
- * @buf: user's buffer into which to read data
- * @count: number of bytes to read
+ * @iov: vector of user buffers into which to read data
+ * @nr_segs: size of iov vector
* @pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
@@ -763,19 +763,25 @@ static ssize_t nfs_direct_write(struct k
* client must read the updated atime from the server back into its
* cache.
*/
-ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
ssize_t retval = -EINVAL;
int page_count;
struct page **pages;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
+ /* XXX: temporary */
+ const char __user *buf = iov[0].iov_base;
+ size_t count = iov[0].iov_len;

dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);

+ if (nr_segs != 1)
+ return -EINVAL;
+
if (count < 0)
goto out;
retval = -EFAULT;
@@ -807,8 +813,8 @@ out:
/**
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
- * @buf: user's buffer from which to write data
- * @count: number of bytes to write
+ * @iov: vector of user buffers from which to write data
+ * @nr_segs: size of iov vector
* @pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
@@ -829,19 +835,25 @@ out:
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
ssize_t retval;
int page_count;
struct page **pages;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
+ /* XXX: temporary */
+ const char __user *buf = iov[0].iov_base;
+ size_t count = iov[0].iov_len;

dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);

+ if (nr_segs != 1)
+ return -EINVAL;
+
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
goto out;
Index: linux-2.6.17-rc3.save/fs/nfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/nfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/nfs/file.c 2006-05-02 08:31:58.000000000 -0700
@@ -41,8 +41,8 @@ static int nfs_file_release(struct inode
static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
static int nfs_file_mmap(struct file *, struct vm_area_struct *);
static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
-static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
-static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
+static ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos);
+static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos);
static int nfs_file_flush(struct file *);
static int nfs_fsync(struct file *, struct dentry *dentry, int datasync);
static int nfs_check_flags(int flags);
@@ -53,8 +53,8 @@ const struct file_operations nfs_file_op
.llseek = nfs_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .aio_read = nfs_file_read,
- .aio_write = nfs_file_write,
+ .aio_read = nfs_file_read,
+ .aio_write = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs_file_open,
.flush = nfs_file_flush,
@@ -212,26 +212,30 @@ nfs_file_flush(struct file *file)
return status;
}

-static ssize_t
-nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
+static ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
+ unsigned long seg;
+ size_t count = 0;
+
+ for (seg = 0; seg < nr_segs; seg++)
+ count += iov[seg].iov_len;

#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_read(iocb, buf, count, pos);
+ return nfs_file_direct_read(iocb, iov, nr_segs, pos);
#endif

- dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
+ dfprintk(VFS, "nfs: read(%s/%s, %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- (unsigned long) count, (unsigned long) pos);
+ (unsigned long) count, (long long) pos);

result = nfs_revalidate_file(inode, iocb->ki_filp);
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
if (!result)
- result = generic_file_aio_read(iocb, buf, count, pos);
+ result = generic_file_aio_read(iocb, iov, nr_segs, pos);
return result;
}

@@ -343,24 +347,25 @@ struct address_space_operations nfs_file
#endif
};

-/*
- * Write to a file (through the page cache).
- */
-static ssize_t
-nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
+ unsigned long seg;
+ size_t count = 0;
+
+ for (seg = 0; seg < nr_segs; seg++)
+ count += iov[seg].iov_len;

#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_write(iocb, buf, count, pos);
+ return nfs_file_direct_write(iocb, iov, nr_segs, pos);
#endif

- dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
+ dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- inode->i_ino, (unsigned long) count, (unsigned long) pos);
+ inode->i_ino, (unsigned long) count, (long long) pos);

result = -EBUSY;
if (IS_SWAPFILE(inode))
@@ -380,7 +385,7 @@ nfs_file_write(struct kiocb *iocb, const
goto out;

nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
- result = generic_file_aio_write(iocb, buf, count, pos);
+ result = generic_file_aio_write(iocb, iov, nr_segs, pos);
out:
return result;

Index: linux-2.6.17-rc3.save/include/linux/nfs_fs.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/nfs_fs.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/nfs_fs.h 2006-05-02 08:31:58.000000000 -0700
@@ -359,10 +359,10 @@ extern int nfs3_removexattr (struct dent
*/
extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
unsigned long);
-extern ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf,
- size_t count, loff_t pos);
-extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos);
+extern ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
+extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);

/*
* linux/fs/nfs/dir.c
Index: linux-2.6.17-rc3.save/fs/ocfs2/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ocfs2/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ocfs2/file.c 2006-05-05 13:36:49.000000000 -0700
@@ -929,25 +929,23 @@ static inline int ocfs2_write_should_rem
}

static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };
int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0;
u32 clusters;
struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
loff_t newsize, saved_pos;

- mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,
- (unsigned int)count,
+ mlog_entry("(0x%p, %u, '%.*s')\n", filp,
+ (unsigned int)nr_segs,
filp->f_dentry->d_name.len,
filp->f_dentry->d_name.name);

/* happy write of zero bytes */
- if (count == 0)
+ if (iocb->ki_left == 0)
return 0;

if (!inode) {
@@ -1016,7 +1014,7 @@ static ssize_t ocfs2_file_aio_write(stru
} else {
saved_pos = iocb->ki_pos;
}
- newsize = count + saved_pos;
+ newsize = iocb->ki_left + saved_pos;

mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
(long long) saved_pos, (long long) newsize,
@@ -1059,7 +1057,7 @@ static ssize_t ocfs2_file_aio_write(stru
/* Fill any holes which would've been created by this
* write. If we're O_APPEND, this will wind up
* (correctly) being a noop. */
- ret = ocfs2_zero_extend(inode, (u64) newsize - count);
+ ret = ocfs2_zero_extend(inode, (u64) newsize - iocb->ki_left);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -1075,7 +1073,7 @@ static ssize_t ocfs2_file_aio_write(stru
/* communicate with ocfs2_dio_end_io */
ocfs2_iocb_set_rw_locked(iocb);

- ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
+ ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos);

/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
@@ -1109,16 +1107,16 @@ out:
}

static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
int ret = 0, rw_level = -1, have_alloc_sem = 0;
struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;

- mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,
- (unsigned int)count,
+ mlog_entry("(0x%p, %u, '%.*s')\n", filp,
+ (unsigned int)nr_segs,
filp->f_dentry->d_name.len,
filp->f_dentry->d_name.name);

@@ -1146,7 +1144,7 @@ static ssize_t ocfs2_file_aio_read(struc
ocfs2_iocb_set_rw_locked(iocb);
}

- ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos);
+ ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
if (ret == -EINVAL)
mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n");

Index: linux-2.6.17-rc3.save/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ntfs/file.c 2006-05-02 08:28:50.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ntfs/file.c 2006-05-09 10:58:58.000000000 -0700
@@ -2174,20 +2174,18 @@ out:
/**
* ntfs_file_aio_write -
*/
-static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };

BUG_ON(iocb->ki_pos != pos);

mutex_lock(&inode->i_mutex);
- ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
+ ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err = sync_page_range(inode, mapping, pos, ret);


2006-05-09 18:06:45

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 2/3] Remove readv/writev methods and use aio_read/aio_write instead

This patch removes readv() and writev() methods and replaces
them with aio_read()/aio_write() methods.

Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>

drivers/char/raw.c | 2
drivers/net/tun.c | 35 +++------------
fs/bad_inode.c | 2
fs/block_dev.c | 2
fs/cifs/cifsfs.c | 16 ------
fs/compat.c | 44 ++++---------------
fs/ext2/file.c | 2
fs/ext3/file.c | 2
fs/fat/file.c | 2
fs/fuse/dev.c | 35 +++------------
fs/hostfs/hostfs_kern.c | 2
fs/jfs/file.c | 2
fs/ntfs/file.c | 2
fs/pipe.c | 49 +++++----------------
fs/read_write.c | 101 +++++++++++++++++++++++++++++---------------
fs/read_write.h | 14 ++++++
fs/xfs/linux-2.6/xfs_file.c | 92 ----------------------------------------
include/linux/fs.h | 6 --
mm/filemap.c | 36 ---------------
net/socket.c | 40 -----------------
sound/core/pcm_native.c | 40 ++++++++---------
21 files changed, 141 insertions(+), 385 deletions(-)

Index: linux-2.6.17-rc3.save/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc3.save.orig/drivers/char/raw.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/drivers/char/raw.c 2006-05-09 08:08:28.000000000 -0700
@@ -258,8 +258,6 @@ static struct file_operations raw_fops =
.open = raw_open,
.release= raw_release,
.ioctl = raw_ioctl,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.owner = THIS_MODULE,
};

Index: linux-2.6.17-rc3.save/drivers/net/tun.c
===================================================================
--- linux-2.6.17-rc3.save.orig/drivers/net/tun.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/drivers/net/tun.c 2006-05-09 08:08:28.000000000 -0700
@@ -289,11 +289,10 @@ static inline size_t iov_total(const str
return len;
}

-/* Writev */
-static ssize_t tun_chr_writev(struct file * file, const struct iovec *iv,
- unsigned long count, loff_t *pos)
+static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
{
- struct tun_struct *tun = file->private_data;
+ struct tun_struct *tun = iocb->ki_filp->private_data;

if (!tun)
return -EBADFD;
@@ -303,14 +302,6 @@ static ssize_t tun_chr_writev(struct fil
return tun_get_user(tun, (struct iovec *) iv, iov_total(iv, count));
}

-/* Write */
-static ssize_t tun_chr_write(struct file * file, const char __user * buf,
- size_t count, loff_t *pos)
-{
- struct iovec iv = { (void __user *) buf, count };
- return tun_chr_writev(file, &iv, 1, pos);
-}
-
/* Put packet to the user space buffer */
static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
struct sk_buff *skb,
@@ -344,10 +335,10 @@ static __inline__ ssize_t tun_put_user(s
return total;
}

-/* Readv */
-static ssize_t tun_chr_readv(struct file *file, const struct iovec *iv,
- unsigned long count, loff_t *pos)
+static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
{
+ struct file *file = iocb->ki_filp;
struct tun_struct *tun = file->private_data;
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
@@ -427,14 +418,6 @@ static ssize_t tun_chr_readv(struct file
return ret;
}

-/* Read */
-static ssize_t tun_chr_read(struct file * file, char __user * buf,
- size_t count, loff_t *pos)
-{
- struct iovec iv = { buf, count };
- return tun_chr_readv(file, &iv, 1, pos);
-}
-
static void tun_setup(struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
@@ -762,10 +745,8 @@ static int tun_chr_close(struct inode *i
static struct file_operations tun_fops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
- .read = tun_chr_read,
- .readv = tun_chr_readv,
- .write = tun_chr_write,
- .writev = tun_chr_writev,
+ .aio_read = tun_chr_aio_read,
+ .aio_write = tun_chr_aio_write,
.poll = tun_chr_poll,
.ioctl = tun_chr_ioctl,
.open = tun_chr_open,
Index: linux-2.6.17-rc3.save/fs/bad_inode.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/bad_inode.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/bad_inode.c 2006-05-09 08:08:28.000000000 -0700
@@ -40,8 +40,6 @@ static const struct file_operations bad_
.aio_fsync = EIO_ERROR,
.fasync = EIO_ERROR,
.lock = EIO_ERROR,
- .readv = EIO_ERROR,
- .writev = EIO_ERROR,
.sendfile = EIO_ERROR,
.sendpage = EIO_ERROR,
.get_unmapped_area = EIO_ERROR,
Index: linux-2.6.17-rc3.save/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/block_dev.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/block_dev.c 2006-05-09 08:08:28.000000000 -0700
@@ -1093,8 +1093,6 @@ const struct file_operations def_blk_fop
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
- .readv = generic_file_readv,
- .writev = generic_file_write_nolock,
.sendfile = generic_file_sendfile,
};

Index: linux-2.6.17-rc3.save/fs/cifs/cifsfs.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/cifs/cifsfs.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/cifs/cifsfs.c 2006-05-09 08:08:28.000000000 -0700
@@ -484,18 +484,6 @@ cifs_get_sb(struct file_system_type *fs_
return sb;
}

-static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct inode *inode = file->f_dentry->d_inode;
- ssize_t written;
-
- written = generic_file_writev(file, iov, nr_segs, ppos);
- if (!CIFS_I(inode)->clientCanCacheAll)
- filemap_fdatawrite(inode->i_mapping);
- return written;
-}
-
static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -581,8 +569,6 @@ struct inode_operations cifs_symlink_ino
const struct file_operations cifs_file_ops = {
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = cifs_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = cifs_file_aio_write,
.open = cifs_open,
@@ -624,8 +610,6 @@ const struct file_operations cifs_file_d
const struct file_operations cifs_file_nobrl_ops = {
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = cifs_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = cifs_file_aio_write,
.open = cifs_open,
Index: linux-2.6.17-rc3.save/fs/compat.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/compat.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/compat.c 2006-05-09 08:08:28.000000000 -0700
@@ -55,6 +55,8 @@

extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);

+#include "read_write.h"
+
/*
* Not all architectures have sys_utime, so implement this in terms
* of sys_utimes.
@@ -1139,9 +1141,6 @@ static ssize_t compat_do_readv_writev(in
const struct compat_iovec __user *uvector,
unsigned long nr_segs, loff_t *pos)
{
- typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
- typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *);
-
compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov=iovstack, *vector;
@@ -1224,39 +1223,18 @@ static ssize_t compat_do_readv_writev(in
fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->readv;
+ fnv = file->f_op->aio_read;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->writev;
- }
- if (fnv) {
- ret = fnv(file, iov, nr_segs, pos);
- goto out;
+ fnv = file->f_op->aio_write;
}

- /* Do it by hand, with file-ops */
- ret = 0;
- vector = iov;
- while (nr_segs > 0) {
- void __user * base;
- size_t len;
- ssize_t nr;
-
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(file, base, len, pos);
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+ pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);

- if (nr < 0) {
- if (!ret) ret = nr;
- break;
- }
- ret += nr;
- if (nr != len)
- break;
- }
out:
if (iov != iovstack)
kfree(iov);
@@ -1284,7 +1262,7 @@ compat_sys_readv(unsigned long fd, const
goto out;

ret = -EINVAL;
- if (!file->f_op || (!file->f_op->readv && !file->f_op->read))
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
goto out;

ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos);
@@ -1307,7 +1285,7 @@ compat_sys_writev(unsigned long fd, cons
goto out;

ret = -EINVAL;
- if (!file->f_op || (!file->f_op->writev && !file->f_op->write))
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
goto out;

ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos);
Index: linux-2.6.17-rc3.save/fs/ext2/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ext2/file.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ext2/file.c 2006-05-09 08:08:28.000000000 -0700
@@ -50,8 +50,6 @@ const struct file_operations ext2_file_o
.open = generic_file_open,
.release = ext2_release_file,
.fsync = ext2_sync_file,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
Index: linux-2.6.17-rc3.save/fs/ext3/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ext3/file.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ext3/file.c 2006-05-09 08:08:28.000000000 -0700
@@ -112,8 +112,6 @@ const struct file_operations ext3_file_o
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = ext3_file_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.ioctl = ext3_ioctl,
.mmap = generic_file_mmap,
.open = generic_file_open,
Index: linux-2.6.17-rc3.save/fs/fat/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/fat/file.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/fat/file.c 2006-05-09 08:08:28.000000000 -0700
@@ -116,8 +116,6 @@ const struct file_operations fat_file_op
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
Index: linux-2.6.17-rc3.save/fs/fuse/dev.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/fuse/dev.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/fuse/dev.c 2006-05-09 08:08:28.000000000 -0700
@@ -585,14 +585,15 @@ static void request_wait(struct fuse_con
* request_end(). Otherwise add it to the processing list, and set
* the 'sent' flag.
*/
-static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *off)
+static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
int err;
struct fuse_req *req;
struct fuse_in *in;
struct fuse_copy_state cs;
unsigned reqsize;
+ struct file *file = iocb->ki_filp;
struct fuse_conn *fc = fuse_get_conn(file);
if (!fc)
return -EPERM;
@@ -658,15 +659,6 @@ static ssize_t fuse_dev_readv(struct fil
return err;
}

-static ssize_t fuse_dev_read(struct file *file, char __user *buf,
- size_t nbytes, loff_t *off)
-{
- struct iovec iov;
- iov.iov_len = nbytes;
- iov.iov_base = buf;
- return fuse_dev_readv(file, &iov, 1, off);
-}
-
/* Look up request on processing list by unique ID */
static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
{
@@ -711,15 +703,15 @@ static int copy_out_args(struct fuse_cop
* it from the list and copy the rest of the buffer to the request.
* The request is finished by calling request_end()
*/
-static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *off)
+static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
int err;
unsigned nbytes = iov_length(iov, nr_segs);
struct fuse_req *req;
struct fuse_out_header oh;
struct fuse_copy_state cs;
- struct fuse_conn *fc = fuse_get_conn(file);
+ struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
if (!fc)
return -EPERM;

@@ -779,15 +771,6 @@ static ssize_t fuse_dev_writev(struct fi
return err;
}

-static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
- size_t nbytes, loff_t *off)
-{
- struct iovec iov;
- iov.iov_len = nbytes;
- iov.iov_base = (char __user *) buf;
- return fuse_dev_writev(file, &iov, 1, off);
-}
-
static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
{
unsigned mask = POLLOUT | POLLWRNORM;
@@ -921,10 +904,8 @@ static int fuse_dev_fasync(int fd, struc
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
.llseek = no_llseek,
- .read = fuse_dev_read,
- .readv = fuse_dev_readv,
- .write = fuse_dev_write,
- .writev = fuse_dev_writev,
+ .aio_read = fuse_dev_read,
+ .aio_write = fuse_dev_write,
.poll = fuse_dev_poll,
.release = fuse_dev_release,
.fasync = fuse_dev_fasync,
Index: linux-2.6.17-rc3.save/fs/hostfs/hostfs_kern.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hostfs/hostfs_kern.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hostfs/hostfs_kern.c 2006-05-09 08:08:28.000000000 -0700
@@ -390,8 +390,6 @@ static const struct file_operations host
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.write = generic_file_write,
.mmap = generic_file_mmap,
.open = hostfs_file_open,
Index: linux-2.6.17-rc3.save/fs/jfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/jfs/file.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/jfs/file.c 2006-05-09 08:08:28.000000000 -0700
@@ -108,8 +108,6 @@ const struct file_operations jfs_file_op
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.sendfile = generic_file_sendfile,
.fsync = jfs_fsync,
.release = jfs_release,
Index: linux-2.6.17-rc3.save/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ntfs/file.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ntfs/file.c 2006-05-09 08:08:28.000000000 -0700
@@ -2296,11 +2296,9 @@ const struct file_operations ntfs_file_o
.llseek = generic_file_llseek, /* Seek inside file. */
.read = generic_file_read, /* Read from file. */
.aio_read = generic_file_aio_read, /* Async read from file. */
- .readv = generic_file_readv, /* Read from file. */
#ifdef NTFS_RW
.write = ntfs_file_write, /* Write to file. */
.aio_write = ntfs_file_aio_write, /* Async write to file. */
- .writev = ntfs_file_writev, /* Write to file. */
/*.release = ,*/ /* Last file is closed. See
fs/ext2/file.c::
ext2_release_file() for
Index: linux-2.6.17-rc3.save/fs/pipe.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/pipe.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/pipe.c 2006-05-09 08:08:28.000000000 -0700
@@ -147,9 +147,10 @@ static struct pipe_buf_operations anon_p
};

static ssize_t
-pipe_readv(struct file *filp, const struct iovec *_iov,
- unsigned long nr_segs, loff_t *ppos)
+pipe_read(struct kiocb *iocb, const struct iovec *_iov,
+ unsigned long nr_segs, loff_t pos)
{
+ struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
struct pipe_inode_info *pipe;
int do_wakeup;
@@ -248,17 +249,10 @@ pipe_readv(struct file *filp, const stru
}

static ssize_t
-pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
- struct iovec iov = { .iov_base = buf, .iov_len = count };
-
- return pipe_readv(filp, &iov, 1, ppos);
-}
-
-static ssize_t
-pipe_writev(struct file *filp, const struct iovec *_iov,
+pipe_write(struct kiocb *iocb, const struct iovec *_iov,
unsigned long nr_segs, loff_t *ppos)
{
+ struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
struct pipe_inode_info *pipe;
ssize_t ret;
@@ -404,15 +398,6 @@ out:
}

static ssize_t
-pipe_write(struct file *filp, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return pipe_writev(filp, &iov, 1, ppos);
-}
-
-static ssize_t
bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{
return -EBADF;
@@ -630,8 +615,7 @@ pipe_rdwr_open(struct inode *inode, stru
*/
const struct file_operations read_fifo_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
+ .aio_read = pipe_read,
.write = bad_pipe_w,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
@@ -643,8 +627,7 @@ const struct file_operations read_fifo_f
const struct file_operations write_fifo_fops = {
.llseek = no_llseek,
.read = bad_pipe_r,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_write_open,
@@ -654,10 +637,8 @@ const struct file_operations write_fifo_

const struct file_operations rdwr_fifo_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_read = pipe_read,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
@@ -667,8 +648,7 @@ const struct file_operations rdwr_fifo_f

static struct file_operations read_pipe_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
+ .aio_read = pipe_read,
.write = bad_pipe_w,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
@@ -680,8 +660,7 @@ static struct file_operations read_pipe_
static struct file_operations write_pipe_fops = {
.llseek = no_llseek,
.read = bad_pipe_r,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_write_open,
@@ -691,10 +670,8 @@ static struct file_operations write_pipe

static struct file_operations rdwr_pipe_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_read = pipe_read,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
Index: linux-2.6.17-rc3.save/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/read_write.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/read_write.c 2006-05-09 10:58:53.000000000 -0700
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
+#include "read_write.h"

#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -450,6 +451,63 @@ unsigned long iov_shorten(struct iovec *

EXPORT_SYMBOL(iov_shorten);

+ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
+{
+ struct kiocb kiocb;
+ ssize_t ret;
+
+ init_sync_kiocb(&kiocb, filp);
+ kiocb.ki_pos = *ppos;
+ kiocb.ki_left = len;
+ kiocb.ki_nbytes = len;
+
+ for (;;) {
+ ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
+ wait_on_retry_sync_kiocb(&kiocb);
+ }
+
+ if (ret == -EIOCBQUEUED)
+ ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
+ return ret;
+}
+
+/* Do it by hand, with file-ops */
+ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
+{
+ struct iovec *vector = iov;
+ ssize_t ret = 0;
+
+
+ while (nr_segs > 0) {
+ void __user * base;
+ size_t len;
+ ssize_t nr;
+
+ base = vector->iov_base;
+ len = vector->iov_len;
+ vector++;
+ nr_segs--;
+
+ nr = fn(filp, base, len, ppos);
+
+ if (nr < 0) {
+ if (!ret)
+ ret = nr;
+ break;
+ }
+ ret += nr;
+ if (nr != len)
+ break;
+ }
+
+ return ret;
+}
+
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)

@@ -457,12 +515,9 @@ static ssize_t do_readv_writev(int type,
const struct iovec __user * uvector,
unsigned long nr_segs, loff_t *pos)
{
- typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
- typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *);
-
size_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov=iovstack, *vector;
+ struct iovec *iov = iovstack;
ssize_t ret;
int seg;
io_fn_t fn;
@@ -532,39 +587,17 @@ static ssize_t do_readv_writev(int type,
fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->readv;
+ fnv = file->f_op->aio_read;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->writev;
- }
- if (fnv) {
- ret = fnv(file, iov, nr_segs, pos);
- goto out;
+ fnv = file->f_op->aio_write;
}

- /* Do it by hand, with file-ops */
- ret = 0;
- vector = iov;
- while (nr_segs > 0) {
- void __user * base;
- size_t len;
- ssize_t nr;
-
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(file, base, len, pos);
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);

- if (nr < 0) {
- if (!ret) ret = nr;
- break;
- }
- ret += nr;
- if (nr != len)
- break;
- }
out:
if (iov != iovstack)
kfree(iov);
@@ -585,7 +618,7 @@ ssize_t vfs_readv(struct file *file, con
{
if (!(file->f_mode & FMODE_READ))
return -EBADF;
- if (!file->f_op || (!file->f_op->readv && !file->f_op->read))
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
return -EINVAL;

return do_readv_writev(READ, file, vec, vlen, pos);
@@ -598,7 +631,7 @@ ssize_t vfs_writev(struct file *file, co
{
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
- if (!file->f_op || (!file->f_op->writev && !file->f_op->write))
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
return -EINVAL;

return do_readv_writev(WRITE, file, vec, vlen, pos);
Index: linux-2.6.17-rc3.save/fs/read_write.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17-rc3.save/fs/read_write.h 2006-05-09 08:08:28.000000000 -0700
@@ -0,0 +1,14 @@
+/*
+ * This file is only for sharing some helpers from read_write.c with compat.c.
+ * Don't use anywhere else.
+ */
+
+
+typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
+typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+
+ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
+ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
Index: linux-2.6.17-rc3.save/fs/xfs/linux-2.6/xfs_file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/xfs/linux-2.6/xfs_file.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/xfs/linux-2.6/xfs_file.c 2006-05-09 08:08:28.000000000 -0700
@@ -129,94 +129,6 @@ xfs_file_aio_write_invis(
return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

-STATIC inline ssize_t
-__xfs_file_readv(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = vn_from_inode(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
-
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
- VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-xfs_file_readv(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_readv(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-xfs_file_readv_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
-STATIC inline ssize_t
-__xfs_file_writev(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = vn_from_inode(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
-
- VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-xfs_file_writev(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_writev(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-xfs_file_writev_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_writev(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
STATIC ssize_t
xfs_file_sendfile(
struct file *filp,
@@ -577,8 +489,6 @@ const struct file_operations xfs_file_op
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = xfs_file_readv,
- .writev = xfs_file_writev,
.aio_read = xfs_file_aio_read,
.aio_write = xfs_file_aio_write,
.sendfile = xfs_file_sendfile,
@@ -601,8 +511,6 @@ const struct file_operations xfs_invis_f
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = xfs_file_readv_invis,
- .writev = xfs_file_writev_invis,
.aio_read = xfs_file_aio_read_invis,
.aio_write = xfs_file_aio_write_invis,
.sendfile = xfs_file_sendfile_invis,
Index: linux-2.6.17-rc3.save/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/fs.h 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/fs.h 2006-05-09 10:58:53.000000000 -0700
@@ -1031,8 +1031,6 @@ struct file_operations {
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
- ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
@@ -1624,10 +1622,6 @@ extern long do_splice_direct(struct file

extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
-extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
-ssize_t generic_file_writev(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
extern loff_t remote_llseek(struct file *file, loff_t offset, int origin);
Index: linux-2.6.17-rc3.save/mm/filemap.c
===================================================================
--- linux-2.6.17-rc3.save.orig/mm/filemap.c 2006-05-08 15:04:57.000000000 -0700
+++ linux-2.6.17-rc3.save/mm/filemap.c 2006-05-09 08:08:28.000000000 -0700
@@ -2266,42 +2266,6 @@ ssize_t generic_file_write(struct file *
}
EXPORT_SYMBOL(generic_file_write);

-ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- ret = __generic_file_aio_read(&kiocb, iov, nr_segs, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-EXPORT_SYMBOL(generic_file_readv);
-
-ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret;
-
- mutex_lock(&inode->i_mutex);
- ret = __generic_file_write_nolock(file, iov, nr_segs, ppos);
- mutex_unlock(&inode->i_mutex);
-
- if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
-
- err = sync_page_range(inode, mapping, *ppos - ret, ret);
- if (err < 0)
- ret = err;
- }
- return ret;
-}
-EXPORT_SYMBOL(generic_file_writev);
-
/*
* Called under i_mutex for writes to S_ISREG files. Returns -EIO if something
* went wrong during pagecache shootdown.
Index: linux-2.6.17-rc3.save/net/socket.c
===================================================================
--- linux-2.6.17-rc3.save.orig/net/socket.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/net/socket.c 2006-05-09 08:08:28.000000000 -0700
@@ -112,10 +112,6 @@ static long compat_sock_ioctl(struct fil
unsigned int cmd, unsigned long arg);
#endif
static int sock_fasync(int fd, struct file *filp, int on);
-static ssize_t sock_readv(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
-static ssize_t sock_writev(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more);

@@ -138,8 +134,6 @@ static struct file_operations socket_fil
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
- .readv = sock_readv,
- .writev = sock_writev,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
};
@@ -736,23 +730,6 @@ static ssize_t do_sock_read(struct msghd
return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
}

-static ssize_t sock_readv(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb iocb;
- struct sock_iocb siocb;
- struct msghdr msg;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- iocb.private = &siocb;
-
- ret = do_sock_read(&msg, &iocb, file, iov, nr_segs);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
-}
-
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -795,23 +772,6 @@ static ssize_t do_sock_write(struct msgh
return __sock_sendmsg(iocb, sock, msg, size);
}

-static ssize_t sock_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct msghdr msg;
- struct kiocb iocb;
- struct sock_iocb siocb;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- iocb.private = &siocb;
-
- ret = do_sock_write(&msg, &iocb, file, iov, nr_segs);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
-}
-
static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
Index: linux-2.6.17-rc3.save/sound/core/pcm_native.c
===================================================================
--- linux-2.6.17-rc3.save.orig/sound/core/pcm_native.c 2006-05-05 13:53:05.000000000 -0700
+++ linux-2.6.17-rc3.save/sound/core/pcm_native.c 2006-05-09 08:08:28.000000000 -0700
@@ -2819,8 +2819,8 @@ static ssize_t snd_pcm_write(struct file
return result;
}

-static ssize_t snd_pcm_readv(struct file *file, const struct iovec *_vector,
- unsigned long count, loff_t * offset)
+static ssize_t snd_pcm_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)

{
struct snd_pcm_file *pcm_file;
@@ -2831,22 +2831,22 @@ static ssize_t snd_pcm_readv(struct file
void __user **bufs;
snd_pcm_uframes_t frames;

- pcm_file = file->private_data;
+ pcm_file = iocb->ki_filp->private_data;
substream = pcm_file->substream;
snd_assert(substream != NULL, return -ENXIO);
runtime = substream->runtime;
if (runtime->status->state == SNDRV_PCM_STATE_OPEN)
return -EBADFD;
- if (count > 1024 || count != runtime->channels)
+ if (nr_segs > 1024 || nr_segs != runtime->channels)
return -EINVAL;
- if (!frame_aligned(runtime, _vector->iov_len))
+ if (!frame_aligned(runtime, iov->iov_len))
return -EINVAL;
- frames = bytes_to_samples(runtime, _vector->iov_len);
- bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL);
+ frames = bytes_to_samples(runtime, iov->iov_len);
+ bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
- for (i = 0; i < count; ++i)
- bufs[i] = _vector[i].iov_base;
+ for (i = 0; i < nr_segs; ++i)
+ bufs[i] = iov[i].iov_base;
result = snd_pcm_lib_readv(substream, bufs, frames);
if (result > 0)
result = frames_to_bytes(runtime, result);
@@ -2854,8 +2854,8 @@ static ssize_t snd_pcm_readv(struct file
return result;
}

-static ssize_t snd_pcm_writev(struct file *file, const struct iovec *_vector,
- unsigned long count, loff_t * offset)
+static ssize_t snd_pcm_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct snd_pcm_file *pcm_file;
struct snd_pcm_substream *substream;
@@ -2865,7 +2865,7 @@ static ssize_t snd_pcm_writev(struct fil
void __user **bufs;
snd_pcm_uframes_t frames;

- pcm_file = file->private_data;
+ pcm_file = iocb->ki_filp->private_data;
substream = pcm_file->substream;
snd_assert(substream != NULL, result = -ENXIO; goto end);
runtime = substream->runtime;
@@ -2873,17 +2873,17 @@ static ssize_t snd_pcm_writev(struct fil
result = -EBADFD;
goto end;
}
- if (count > 128 || count != runtime->channels ||
- !frame_aligned(runtime, _vector->iov_len)) {
+ if (nr_segs > 128 || nr_segs != runtime->channels ||
+ !frame_aligned(runtime, iov->iov_len)) {
result = -EINVAL;
goto end;
}
- frames = bytes_to_samples(runtime, _vector->iov_len);
- bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL);
+ frames = bytes_to_samples(runtime, iov->iov_len);
+ bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
- for (i = 0; i < count; ++i)
- bufs[i] = _vector[i].iov_base;
+ for (i = 0; i < nr_segs; ++i)
+ bufs[i] = iov[i].iov_base;
result = snd_pcm_lib_writev(substream, bufs, frames);
if (result > 0)
result = frames_to_bytes(runtime, result);
@@ -3389,7 +3389,7 @@ struct file_operations snd_pcm_f_ops[2]
{
.owner = THIS_MODULE,
.write = snd_pcm_write,
- .writev = snd_pcm_writev,
+ .aio_write = snd_pcm_aio_write,
.open = snd_pcm_playback_open,
.release = snd_pcm_release,
.poll = snd_pcm_playback_poll,
@@ -3401,7 +3401,7 @@ struct file_operations snd_pcm_f_ops[2]
{
.owner = THIS_MODULE,
.read = snd_pcm_read,
- .readv = snd_pcm_readv,
+ .aio_read = snd_pcm_aio_read,
.open = snd_pcm_capture_open,
.release = snd_pcm_release,
.poll = snd_pcm_capture_poll,


2006-05-09 18:07:34

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 3/3] Zach's core aio changes to support vectored AIO

This work is initially done by Zach Brown to add support for
vectored aio. These are the core changes for AIO to support
IOCB_CMD_PREADV/IOCB_CMD_PWRITEV.

I made few extra changes beyond Zach's work. They are
- took out aio_pread/aio_pwrite and made them
a special case into vectored support
- added single inlined vector to save on kmalloc()
for a simple aio_read/aio_write
- kiocb->ki_left always indicates the amount of
IO need to be done. Made sure that this gets
set in sync case also, so that we don't need
to loop over iovecs to figure out IO size all
the time.

Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: Zach Brown <[email protected]>
Acked-by: Benjamin LaHaise <[email protected]>

fs/aio.c | 165 +++++++++++++++++++++++++++++++++---------------
fs/read_write.c | 127 +++++++++++++++++++++---------------
include/linux/aio.h | 4 +
include/linux/aio_abi.h | 2
include/linux/fs.h | 5 +
5 files changed, 199 insertions(+), 104 deletions(-)

Index: linux-2.6.17-rc3.save/fs/aio.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/aio.c 2006-05-05 13:53:03.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/aio.c 2006-05-09 08:08:36.000000000 -0700
@@ -415,6 +415,7 @@ static struct kiocb fastcall *__aio_get_
req->ki_retry = NULL;
req->ki_dtor = NULL;
req->private = NULL;
+ req->ki_iovec = NULL;
INIT_LIST_HEAD(&req->ki_run_list);

/* Check if the completion queue has enough free space to
@@ -460,6 +461,8 @@ static inline void really_put_req(struct

if (req->ki_dtor)
req->ki_dtor(req);
+ if (req->ki_iovec != &req->ki_inline_vec)
+ kfree(req->ki_iovec);
kmem_cache_free(kiocb_cachep, req);
ctx->reqs_active--;

@@ -1301,69 +1304,63 @@ asmlinkage long sys_io_destroy(aio_conte
return -EINVAL;
}

-/*
- * aio_p{read,write} are the default ki_retry methods for
- * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially
- * multiple calls to f_op->aio_read(). They loop around partial progress
- * instead of returning -EIOCBRETRY because they don't have the means to call
- * kick_iocb().
- */
-static ssize_t aio_pread(struct kiocb *iocb)
+static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret)
{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret = 0;
+ struct iovec *iov = &iocb->ki_iovec[iocb->ki_cur_seg];

- do {
- iocb->ki_inline_vec.iov_base = iocb->ki_buf;
- iocb->ki_inline_vec.iov_len = iocb->ki_left;
+ BUG_ON(ret <= 0);

- ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec,
- 1, iocb->ki_pos);
- /*
- * Can't just depend on iocb->ki_left to determine
- * whether we are done. This may have been a short read.
- */
- if (ret > 0) {
- iocb->ki_buf += ret;
- iocb->ki_left -= ret;
+ while (iocb->ki_cur_seg < iocb->ki_nr_segs && ret > 0) {
+ ssize_t this = min(iov->iov_len, (size_t)ret);
+ iov->iov_base += this;
+ iov->iov_len -= this;
+ iocb->ki_left -= this;
+ ret -= this;
+ if (iov->iov_len == 0) {
+ iocb->ki_cur_seg++;
+ iov++;
}
+ }

- /*
- * For pipes and sockets we return once we have some data; for
- * regular files we retry till we complete the entire read or
- * find that we can't read any more data (e.g short reads).
- */
- } while (ret > 0 && iocb->ki_left > 0 &&
- !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
-
- /* This means we must have transferred all that we could */
- /* No need to retry anymore */
- if ((ret == 0) || (iocb->ki_left == 0))
- ret = iocb->ki_nbytes - iocb->ki_left;
-
- return ret;
+ /* the caller should not have done more io than what fit in
+ * the remaining iovecs */
+ BUG_ON(ret > 0 && iocb->ki_left == 0);
}

-/* see aio_pread() */
-static ssize_t aio_pwrite(struct kiocb *iocb)
+static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
{
struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ ssize_t (*rw_op)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
ssize_t ret = 0;
+ unsigned short opcode;
+
+ if ((iocb->ki_opcode == IOCB_CMD_PREADV) ||
+ (iocb->ki_opcode == IOCB_CMD_PREAD)) {
+ rw_op = file->f_op->aio_read;
+ opcode = IOCB_CMD_PREADV;
+ } else {
+ rw_op = file->f_op->aio_write;
+ opcode = IOCB_CMD_PWRITEV;
+ }

do {
- iocb->ki_inline_vec.iov_base = iocb->ki_buf;
- iocb->ki_inline_vec.iov_len = iocb->ki_left;
+ ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
+ iocb->ki_nr_segs - iocb->ki_cur_seg,
+ iocb->ki_pos);
+ if (ret > 0)
+ aio_advance_iovec(iocb, ret);

- ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec,
- 1, iocb->ki_pos);
- if (ret > 0) {
- iocb->ki_buf += ret;
- iocb->ki_left -= ret;
- }
- } while (ret > 0 && iocb->ki_left > 0);
+ /* retry all partial writes. retry partial reads as long as its a
+ * regular file. */
+ } while (ret > 0 && iocb->ki_left > 0 &&
+ (opcode == IOCB_CMD_PWRITEV ||
+ (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));

+ /* This means we must have transferred all that we could */
+ /* No need to retry anymore */
if ((ret == 0) || (iocb->ki_left == 0))
ret = iocb->ki_nbytes - iocb->ki_left;

@@ -1390,6 +1387,38 @@ static ssize_t aio_fsync(struct kiocb *i
return ret;
}

+static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb)
+{
+ ssize_t ret;
+
+ ret = rw_copy_check_uvector((struct iovec __user *)kiocb->ki_buf,
+ kiocb->ki_nbytes, 1,
+ &kiocb->ki_inline_vec, &kiocb->ki_iovec);
+ if (ret < 0)
+ goto out;
+
+ kiocb->ki_nr_segs = kiocb->ki_nbytes;
+ kiocb->ki_cur_seg = 0;
+ /* ki_nbytes/left now reflect bytes instead of segs */
+ kiocb->ki_nbytes = ret;
+ kiocb->ki_left = ret;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
+{
+ kiocb->ki_iovec = &kiocb->ki_inline_vec;
+ kiocb->ki_iovec->iov_base = kiocb->ki_buf;
+ kiocb->ki_iovec->iov_len = kiocb->ki_left;
+ kiocb->ki_nr_segs = 1;
+ kiocb->ki_cur_seg = 0;
+ kiocb->ki_nbytes = kiocb->ki_left;
+ return 0;
+}
+
/*
* aio_setup_iocb:
* Performs the initial checks and aio retry method
@@ -1412,9 +1441,12 @@ static ssize_t aio_setup_iocb(struct kio
ret = security_file_permission(file, MAY_READ);
if (unlikely(ret))
break;
+ ret = aio_setup_single_vector(kiocb);
+ if (ret)
+ break;
ret = -EINVAL;
if (file->f_op->aio_read)
- kiocb->ki_retry = aio_pread;
+ kiocb->ki_retry = aio_rw_vect_retry;
break;
case IOCB_CMD_PWRITE:
ret = -EBADF;
@@ -1427,9 +1459,40 @@ static ssize_t aio_setup_iocb(struct kio
ret = security_file_permission(file, MAY_WRITE);
if (unlikely(ret))
break;
+ ret = aio_setup_single_vector(kiocb);
+ if (ret)
+ break;
+ ret = -EINVAL;
+ if (file->f_op->aio_write)
+ kiocb->ki_retry = aio_rw_vect_retry;
+ break;
+ case IOCB_CMD_PREADV:
+ ret = -EBADF;
+ if (unlikely(!(file->f_mode & FMODE_READ)))
+ break;
+ ret = security_file_permission(file, MAY_READ);
+ if (unlikely(ret))
+ break;
+ ret = aio_setup_vectored_rw(kiocb);
+ if (ret)
+ break;
+ ret = -EINVAL;
+ if (file->f_op->aio_read)
+ kiocb->ki_retry = aio_rw_vect_retry;
+ break;
+ case IOCB_CMD_PWRITEV:
+ ret = -EBADF;
+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
+ break;
+ ret = security_file_permission(file, MAY_WRITE);
+ if (unlikely(ret))
+ break;
+ ret = aio_setup_vectored_rw(kiocb);
+ if (ret)
+ break;
ret = -EINVAL;
if (file->f_op->aio_write)
- kiocb->ki_retry = aio_pwrite;
+ kiocb->ki_retry = aio_rw_vect_retry;
break;
case IOCB_CMD_FDSYNC:
ret = -EINVAL;
Index: linux-2.6.17-rc3.save/include/linux/aio.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/aio.h 2006-05-05 13:53:03.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/aio.h 2006-05-09 08:08:36.000000000 -0700
@@ -7,6 +7,7 @@
#include <linux/uio.h>

#include <asm/atomic.h>
+#include <linux/uio.h>

#define AIO_MAXSEGS 4
#define AIO_KIOGRP_NR_ATOMIC 8
@@ -114,6 +115,9 @@ struct kiocb {
long ki_kicked; /* just for testing */
long ki_queued; /* just for testing */
struct iovec ki_inline_vec; /* inline vector */
+ struct iovec *ki_iovec;
+ unsigned long ki_nr_segs;
+ unsigned long ki_cur_seg;

struct list_head ki_list; /* the aio core uses this
* for cancellation */
Index: linux-2.6.17-rc3.save/include/linux/aio_abi.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/aio_abi.h 2006-05-05 13:53:03.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/aio_abi.h 2006-05-09 08:08:36.000000000 -0700
@@ -41,6 +41,8 @@ enum {
* IOCB_CMD_POLL = 5,
*/
IOCB_CMD_NOOP = 6,
+ IOCB_CMD_PREADV = 7,
+ IOCB_CMD_PWRITEV = 8,
};

/* read() from /dev/aio returns these structures. */
Index: linux-2.6.17-rc3.save/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/read_write.c 2006-05-09 08:08:28.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/read_write.c 2006-05-09 08:08:36.000000000 -0700
@@ -508,6 +508,74 @@ ssize_t do_loop_readv_writev(struct file
return ret;
}

+ssize_t rw_copy_check_uvector(const struct iovec __user * uvector,
+ unsigned long nr_segs, unsigned long fast_segs,
+ struct iovec *fast_pointer,
+ struct iovec **ret_pointer)
+ {
+ unsigned long seg;
+ ssize_t ret;
+ struct iovec *iov = fast_pointer;
+
+ /*
+ * SuS says "The readv() function *may* fail if the iovcnt argument
+ * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
+ * traditionally returned zero for zero segments, so...
+ */
+ if (nr_segs == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * First get the "struct iovec" from user memory and
+ * verify all the pointers
+ */
+ if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (nr_segs > fast_segs) {
+ iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+ if (iov == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /*
+ * According to the Single Unix Specification we should return EINVAL
+ * if an element length is < 0 when cast to ssize_t or if the
+ * total length would overflow the ssize_t return value of the
+ * system call.
+ */
+ ret = 0;
+ for (seg = 0; seg < nr_segs; seg++) {
+ void __user *buf = iov[seg].iov_base;
+ ssize_t len = (ssize_t)iov[seg].iov_len;
+
+ /* see if we we're about to use an invalid len or if
+ * it's about to overflow ssize_t */
+ if (len < 0 || (ret + len < ret)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret += len;
+ }
+out:
+ *ret_pointer = iov;
+ return ret;
+}
+
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)

@@ -519,64 +587,20 @@ static ssize_t do_readv_writev(int type,
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
ssize_t ret;
- int seg;
io_fn_t fn;
iov_fn_t fnv;

- /*
- * SuS says "The readv() function *may* fail if the iovcnt argument
- * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
- * traditionally returned zero for zero segments, so...
- */
- ret = 0;
- if (nr_segs == 0)
+ if (!file->f_op) {
+ ret = -EINVAL;
goto out;
-
- /*
- * First get the "struct iovec" from user memory and
- * verify all the pointers
- */
- ret = -EINVAL;
- if (nr_segs > UIO_MAXIOV)
- goto out;
- if (!file->f_op)
- goto out;
- if (nr_segs > UIO_FASTIOV) {
- ret = -ENOMEM;
- iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
- if (!iov)
- goto out;
}
- ret = -EFAULT;
- if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector)))
- goto out;

- /*
- * Single unix specification:
- * We should -EINVAL if an element length is not >= 0 and fitting an
- * ssize_t. The total length is fitting an ssize_t
- *
- * Be careful here because iov_len is a size_t not an ssize_t
- */
- tot_len = 0;
- ret = -EINVAL;
- for (seg = 0; seg < nr_segs; seg++) {
- void __user *buf = iov[seg].iov_base;
- ssize_t len = (ssize_t)iov[seg].iov_len;
-
- if (len < 0) /* size_t not fitting an ssize_t .. */
- goto out;
- if (unlikely(!access_ok(vrfy_dir(type), buf, len)))
- goto Efault;
- tot_len += len;
- if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */
- goto out;
- }
- if (tot_len == 0) {
- ret = 0;
+ ret = rw_copy_check_uvector(uvector, nr_segs, ARRAY_SIZE(iovstack),
+ iovstack, &iov);
+ if (ret <= 0)
goto out;
- }

+ tot_len = ret;
ret = rw_verify_area(type, file, pos, tot_len);
if (ret < 0)
goto out;
@@ -608,9 +632,6 @@ out:
fsnotify_modify(file->f_dentry);
}
return ret;
-Efault:
- ret = -EFAULT;
- goto out;
}

ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
Index: linux-2.6.17-rc3.save/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/fs.h 2006-05-09 08:08:28.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/fs.h 2006-05-09 08:08:36.000000000 -0700
@@ -1068,6 +1068,11 @@ struct inode_operations {

struct seq_file;

+ssize_t rw_copy_check_uvector(const struct iovec __user * uvector,
+ unsigned long nr_segs, unsigned long fast_segs,
+ struct iovec *fast_pointer,
+ struct iovec **ret_pointer);
+
extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_readv(struct file *, const struct iovec __user *,


2006-05-09 18:14:15

by Benjamin LaHaise

[permalink] [raw]
Subject: Re: [PATCH 0/3] VFS changes to collapse AIO and vectored IO into single (set of) fileops.

On Tue, May 09, 2006 at 11:03:45AM -0700, Badari Pulavarty wrote:
> single set of file-operation method using aio_read/aio_write.
> This work was originally suggested & started by Christoph Hellwig,
> when Zach Brown tried to add vectored support for AIO.
>
> Here is the summary:
>
> [PATCH 1/3] Vectorize aio_read/aio_write methods
>
> [PATCH 2/3] Remove readv/writev methods and use aio_read/aio_write
> instead.
>
> [PATCH 3/3] Zach's core aio changes to support vectored AIO.

They look pretty sane, and I agree they should go into -mm soon. Cheers,

-ben
--
"Time is of no importance, Mr. President, only life is important."
Don't Email: <[email protected]>.

2006-05-09 18:55:56

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 3/3] Zach's core aio changes to support vectored AIO

On Tue, May 09, 2006 at 11:08:39AM -0700, Badari Pulavarty wrote:
> This work is initially done by Zach Brown to add support for
> vectored aio. These are the core changes for AIO to support
> IOCB_CMD_PREADV/IOCB_CMD_PWRITEV.
>
> I made few extra changes beyond Zach's work. They are
> - took out aio_pread/aio_pwrite and made them
> a special case into vectored support
> - added single inlined vector to save on kmalloc()
> for a simple aio_read/aio_write
> - kiocb->ki_left always indicates the amount of
> IO need to be done. Made sure that this gets
> set in sync case also, so that we don't need
> to loop over iovecs to figure out IO size all
> the time.
>
> Signed-off-by: Badari Pulavarty <[email protected]>
> Signed-off-by: Zach Brown <[email protected]>
> Acked-by: Benjamin LaHaise <[email protected]>

Please add my Signed-off-by somewhere, I did large portions of the
changes and ACK the final version too.

2006-05-09 18:58:33

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods

Badari Pulavarty <[email protected]> wrote:
>
> static ssize_t ep_aio_read_retry(struct kiocb *iocb)
> {
> struct kiocb_priv *priv = iocb->private;
> - ssize_t status = priv->actual;
> + ssize_t len, total;
>
> /* we "retry" to get the right mm context for this: */
> - status = copy_to_user(priv->ubuf, priv->buf, priv->actual);
> - if (unlikely(0 != status))
> - status = -EFAULT;
> - else
> - status = priv->actual;
> +
> + /* copy stuff into user buffers */
> + total = priv->actual;
> + len = 0;
> + for (i=0; i < priv->count; i++) {

for (i = 0

> + ssize_t this = min(priv->iv[i].iov_len, (size_t)total);

min_t().

Strange mixture of size_t and ssize_t there.

> + if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this))
> + break;
> +
> + total -= this;
> + len += this;
> + if (total <= 0)
> + break;
> + }
> +
> + if (unlikely(len != 0))
> + len = -EFAULT;

This looks wrong. I think you meant (total != 0).




Together these three patches shrink the kernel by 113 lines. I don't know
what the effect is on text size, but that's a pretty modest saving, at a
pretty high risk level.

What else do we get in return for this risk?

2006-05-09 19:03:21

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods

On Tue, May 09, 2006 at 12:01:05PM -0700, Andrew Morton wrote:
> Together these three patches shrink the kernel by 113 lines. I don't know
> what the effect is on text size, but that's a pretty modest saving, at a
> pretty high risk level.
>
> What else do we get in return for this risk?

there's another patch ontop which I didn't bother to redo until this is
accepted which kills a lot more code. After that filesystems only have
to implement one method each for all kinds of read/write calls. Which
allows to both make the mm/filemap.c far less complex and actually
understandable aswell as for any filesystem that uses more complex
read/write variants than direct filemap.c calls. In addition to these
simplification we also get a feature (async vectored I/O) for free.

2006-05-09 19:10:36

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods

Christoph Hellwig <[email protected]> wrote:
>
> On Tue, May 09, 2006 at 12:01:05PM -0700, Andrew Morton wrote:
> > Together these three patches shrink the kernel by 113 lines. I don't know
> > what the effect is on text size, but that's a pretty modest saving, at a
> > pretty high risk level.
> >
> > What else do we get in return for this risk?
>
> there's another patch ontop which I didn't bother to redo until this is
> accepted which kills a lot more code. After that filesystems only have
> to implement one method each for all kinds of read/write calls. Which
> allows to both make the mm/filemap.c far less complex and actually
> understandable aswell as for any filesystem that uses more complex
> read/write variants than direct filemap.c calls. In addition to these
> simplification we also get a feature (async vectored I/O) for free.

Fair enough, thanks. Simplifying filemap.c would be a win.

I'll crunch on these three patches in the normal fashion. It'll be good if
we can get the followup patch done within the next week or two so we can
get it all tested at the same time. Although from your description it
doesn't sound like it'll be completely trivial...

2006-05-09 19:21:04

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods

On Tue, May 09, 2006 at 12:13:05PM -0700, Andrew Morton wrote:
> > there's another patch ontop which I didn't bother to redo until this is
> > accepted which kills a lot more code. After that filesystems only have
> > to implement one method each for all kinds of read/write calls. Which
> > allows to both make the mm/filemap.c far less complex and actually
> > understandable aswell as for any filesystem that uses more complex
> > read/write variants than direct filemap.c calls. In addition to these
> > simplification we also get a feature (async vectored I/O) for free.
>
> Fair enough, thanks. Simplifying filemap.c would be a win.
>
> I'll crunch on these three patches in the normal fashion. It'll be good if
> we can get the followup patch done within the next week or two so we can
> get it all tested at the same time. Although from your description it
> doesn't sound like it'll be completely trivial...

That patch is lots of tirival and boring work. If anyone wants to beat
me to it:

- in any filesystem that implements the generic_file_aio_{read,write}
directly remove these apply this patch to the file_operations
vectors:


- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,

Note that this does _not_ cause additional indirection for normal
sys_read/sys_write calls because they call .aio_read/.aio_write
directly. It's only needed because we have various places in the
tree that like to call .read/.write directly

- in the filesystems that implement more or less trivial wrappers
around generic_file_read/generic_file_write to the
aio_read/aio_write prototypes so they can set .read/.write as above

- after that generic_file_read/generic_file_write/generic_file_read/
generic_file_write_nolock should have no callers left and the code
for read/write in mm/filemap.c can be collapsed into very few functions.
What's left should be something like:

- generic_file_aio_read
(__generic_file_aio_read and generic_file_aio_read merged into one)
- __generic_file_aio_write
(basically the current __generic_file_aio_write_nolock)
- generic_file_aio_write_nolock
- generic_file_aio_write
(small wrappers around __generic_file_aio_write)

2006-05-09 20:05:22

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 3/3] Zach's core aio changes to support vectored AIO



christoph wrote:

>On Tue, May 09, 2006 at 11:08:39AM -0700, Badari Pulavarty wrote:
>
>>This work is initially done by Zach Brown to add support for
>>vectored aio. These are the core changes for AIO to support
>>IOCB_CMD_PREADV/IOCB_CMD_PWRITEV.
>>
>>I made few extra changes beyond Zach's work. They are
>> - took out aio_pread/aio_pwrite and made them
>> a special case into vectored support
>> - added single inlined vector to save on kmalloc()
>> for a simple aio_read/aio_write
>> - kiocb->ki_left always indicates the amount of
>> IO need to be done. Made sure that this gets
>> set in sync case also, so that we don't need
>> to loop over iovecs to figure out IO size all
>> the time.
>>
>>Signed-off-by: Badari Pulavarty <[email protected]>
>>Signed-off-by: Zach Brown <[email protected]>
>>Acked-by: Benjamin LaHaise <[email protected]>
>>
>
>Please add my Signed-off-by somewhere, I did large portions of the
>changes and ACK the final version too.
>
Signed-off-by: Christoph Hellwig <[email protected]>

Done.. !! Sorry, Definitely not intentional :(

Thanks,
Badari



2006-05-09 20:08:04

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods



Christoph Hellwig wrote:

>On Tue, May 09, 2006 at 12:01:05PM -0700, Andrew Morton wrote:
>
>>Together these three patches shrink the kernel by 113 lines. I don't know
>>what the effect is on text size, but that's a pretty modest saving, at a
>>pretty high risk level.
>>
>>What else do we get in return for this risk?
>>
>
>there's another patch ontop which I didn't bother to redo until this is
>accepted which kills a lot more code. After that filesystems only have
>to implement one method each for all kinds of read/write calls. Which
>allows to both make the mm/filemap.c far less complex and actually
>understandable aswell as for any filesystem that uses more complex
>read/write variants than direct filemap.c calls. In addition to these
>simplification we also get a feature (async vectored I/O) for free.
>
Yep. I am currently killing read/write methods for all filesystems and also
getting rid of generic_file_read() and generic_file_write().

Thanks,
Badari


2006-05-09 23:52:17

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods

On Tue, 2006-05-09 at 12:01 -0700, Andrew Morton wrote:
> Badari Pulavarty <[email protected]> wrote:
> >
> > static ssize_t ep_aio_read_retry(struct kiocb *iocb)
> > {
> > struct kiocb_priv *priv = iocb->private;
> > - ssize_t status = priv->actual;
> > + ssize_t len, total;
> >
> > /* we "retry" to get the right mm context for this: */
> > - status = copy_to_user(priv->ubuf, priv->buf, priv->actual);
> > - if (unlikely(0 != status))
> > - status = -EFAULT;
> > - else
> > - status = priv->actual;
> > +
> > + /* copy stuff into user buffers */
> > + total = priv->actual;
> > + len = 0;
> > + for (i=0; i < priv->count; i++) {
>
> for (i = 0
>
> > + ssize_t this = min(priv->iv[i].iov_len, (size_t)total);
>
> min_t().
>
> Strange mixture of size_t and ssize_t there.

Borrowed it from somewhere :(
I will clean it up.

>
> > + if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this))
> > + break;
> > +
> > + total -= this;
> > + len += this;
> > + if (total <= 0)
> > + break;
> > + }
> > +
> > + if (unlikely(len != 0))
> > + len = -EFAULT;
>
> This looks wrong. I think you meant (total != 0).

Yes. It should be "total".

Thanks,
Badari



2006-05-09 23:56:34

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods

On Tue, 2006-05-09 at 21:20 +0200, Christoph Hellwig wrote:
> On Tue, May 09, 2006 at 12:13:05PM -0700, Andrew Morton wrote:
> > > there's another patch ontop which I didn't bother to redo until this is
> > > accepted which kills a lot more code. After that filesystems only have
> > > to implement one method each for all kinds of read/write calls. Which
> > > allows to both make the mm/filemap.c far less complex and actually
> > > understandable aswell as for any filesystem that uses more complex
> > > read/write variants than direct filemap.c calls. In addition to these
> > > simplification we also get a feature (async vectored I/O) for free.
> >
> > Fair enough, thanks. Simplifying filemap.c would be a win.
> >
> > I'll crunch on these three patches in the normal fashion. It'll be good if
> > we can get the followup patch done within the next week or two so we can
> > get it all tested at the same time. Although from your description it
> > doesn't sound like it'll be completely trivial...
>
> That patch is lots of tirival and boring work. If anyone wants to beat
> me to it:

Well, I am not sure if you mean *exactly* this..

So far, I have this. I really don't like the idea of
adding .aio_read/.aio_write methods for the filesystems who currently
don't have one (so we can force their .read/.write to do_sync_*()).
Is there a way to fix callers of .read/.write() methods to use
something like do_sync_read/write - that way we can take out
.read/.write completely ?

Anyway, here it is compiled but untested.. I think I can clean up
more in filemap.c (after reading through your suggestions). Please
let me know, if I am on wrong path ...

Thanks,
Badari

Patch to remove generic_file_read() and generic_file_write()
as we seem to have too many interfaces.

Make .read/.write methods for filesystems to use do_sync_read()
and do_sync_write() which makes use of aio_read/aio_write().

I really don't like keeping .read()/.write() methods since
sys_read/sys_write() can make use of async methods - but
this is for those who call .read/.write() directly.

drivers/char/raw.c | 4 +--
fs/adfs/file.c | 6 +++--
fs/affs/file.c | 6 +++--
fs/bfs/file.c | 6 +++--
fs/block_dev.c | 2 -
fs/ext2/file.c | 4 +--
fs/fuse/file.c | 6 +++--
fs/hfs/inode.c | 6 +++--
fs/hfsplus/inode.c | 6 +++--
fs/hostfs/hostfs_kern.c | 4 +--
fs/hpfs/file.c | 6 +++--
fs/jffs/inode-v23.c | 6 +++--
fs/jffs2/file.c | 6 +++--
fs/jfs/file.c | 4 +--
fs/minix/file.c | 6 +++--
fs/ntfs/file.c | 2 -
fs/qnx4/file.c | 6 +++--
fs/ramfs/file-mmu.c | 6 +++--
fs/ramfs/file-nommu.c | 6 +++--
fs/read_write.c | 3 +-
include/linux/fs.h | 2 -
mm/filemap.c | 55 ------------------------------------------------
22 files changed, 64 insertions(+), 94 deletions(-)

Index: linux-2.6.17-rc3.save/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc3.save.orig/drivers/char/raw.c 2006-05-09 14:11:51.000000000 -0700
+++ linux-2.6.17-rc3.save/drivers/char/raw.c 2006-05-09 14:15:28.000000000 -0700
@@ -251,9 +251,9 @@ static ssize_t raw_file_write(struct fil
}

static struct file_operations raw_fops = {
- .read = generic_file_read,
+ .read = do_sync_read,
.aio_read = generic_file_aio_read,
- .write = raw_file_write,
+ .write = do_sync_write,
.aio_write = generic_file_aio_write_nolock,
.open = raw_open,
.release= raw_release,
Index: linux-2.6.17-rc3.save/fs/adfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/adfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/adfs/file.c 2006-05-09 14:31:50.000000000 -0700
@@ -27,10 +27,12 @@

const struct file_operations adfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
.fsync = file_fsync,
- .write = generic_file_write,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.sendfile = generic_file_sendfile,
};

Index: linux-2.6.17-rc3.save/fs/affs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/affs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/affs/file.c 2006-05-09 14:35:22.000000000 -0700
@@ -27,8 +27,10 @@ static int affs_file_release(struct inod

const struct file_operations affs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.open = affs_file_open,
.release = affs_file_release,
Index: linux-2.6.17-rc3.save/fs/bfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/bfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/bfs/file.c 2006-05-09 14:36:49.000000000 -0700
@@ -19,8 +19,10 @@

const struct file_operations bfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
};
Index: linux-2.6.17-rc3.save/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/block_dev.c 2006-05-09 14:11:51.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/block_dev.c 2006-05-09 14:39:54.000000000 -0700
@@ -1083,7 +1083,7 @@ const struct file_operations def_blk_fop
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
.write = blkdev_file_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write_nolock,
Index: linux-2.6.17-rc3.save/fs/ext2/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ext2/file.c 2006-05-09 14:11:51.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ext2/file.c 2006-05-09 14:41:14.000000000 -0700
@@ -41,8 +41,8 @@ static int ext2_release_file (struct ino
*/
const struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.ioctl = ext2_ioctl,
Index: linux-2.6.17-rc3.save/fs/fuse/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/fuse/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/fuse/file.c 2006-05-09 14:44:43.000000000 -0700
@@ -621,8 +621,10 @@ static int fuse_set_page_dirty(struct pa

static const struct file_operations fuse_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = fuse_file_mmap,
.open = fuse_open,
.flush = fuse_flush,
Index: linux-2.6.17-rc3.save/fs/hfs/inode.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hfs/inode.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hfs/inode.c 2006-05-09 14:46:37.000000000 -0700
@@ -603,8 +603,10 @@ int hfs_inode_setattr(struct dentry *den

static const struct file_operations hfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
.fsync = file_fsync,
Index: linux-2.6.17-rc3.save/fs/hfsplus/inode.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hfsplus/inode.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hfsplus/inode.c 2006-05-09 15:05:44.000000000 -0700
@@ -282,8 +282,10 @@ static struct inode_operations hfsplus_f

static const struct file_operations hfsplus_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
.fsync = file_fsync,
Index: linux-2.6.17-rc3.save/fs/hostfs/hostfs_kern.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hostfs/hostfs_kern.c 2006-05-09 14:11:51.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hostfs/hostfs_kern.c 2006-05-09 15:06:37.000000000 -0700
@@ -386,11 +386,11 @@ int hostfs_fsync(struct file *file, stru

static const struct file_operations hostfs_file_fops = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
- .write = generic_file_write,
+ .write = do_sync_write,
.mmap = generic_file_mmap,
.open = hostfs_file_open,
.release = NULL,
Index: linux-2.6.17-rc3.save/fs/hpfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hpfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hpfs/file.c 2006-05-09 15:08:53.000000000 -0700
@@ -113,7 +113,7 @@ static ssize_t hpfs_file_write(struct fi
{
ssize_t retval;

- retval = generic_file_write(file, buf, count, ppos);
+ retval = do_sync_write(file, buf, count, ppos);
if (retval > 0)
hpfs_i(file->f_dentry->d_inode)->i_dirty = 1;
return retval;
@@ -122,8 +122,10 @@ static ssize_t hpfs_file_write(struct fi
const struct file_operations hpfs_file_ops =
{
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.write = hpfs_file_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.release = hpfs_file_release,
.fsync = hpfs_file_fsync,
Index: linux-2.6.17-rc3.save/fs/jffs/inode-v23.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/jffs/inode-v23.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/jffs/inode-v23.c 2006-05-09 15:10:34.000000000 -0700
@@ -1633,8 +1633,10 @@ static const struct file_operations jffs
{
.open = generic_file_open,
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.ioctl = jffs_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs_fsync,
Index: linux-2.6.17-rc3.save/fs/jffs2/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/jffs2/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/jffs2/file.c 2006-05-09 15:11:58.000000000 -0700
@@ -42,8 +42,10 @@ const struct file_operations jffs2_file_
{
.llseek = generic_file_llseek,
.open = generic_file_open,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.ioctl = jffs2_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs2_fsync,
Index: linux-2.6.17-rc3.save/fs/jfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/jfs/file.c 2006-05-09 14:11:51.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/jfs/file.c 2006-05-09 15:12:41.000000000 -0700
@@ -103,8 +103,8 @@ struct inode_operations jfs_file_inode_o
const struct file_operations jfs_file_operations = {
.open = jfs_open,
.llseek = generic_file_llseek,
- .write = generic_file_write,
- .read = generic_file_read,
+ .write = do_sync_write,
+ .read = do_sync_read,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
Index: linux-2.6.17-rc3.save/fs/minix/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/minix/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/minix/file.c 2006-05-09 15:15:06.000000000 -0700
@@ -17,8 +17,10 @@ int minix_sync_file(struct file *, struc

const struct file_operations minix_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = minix_sync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6.17-rc3.save/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ntfs/file.c 2006-05-09 14:11:51.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ntfs/file.c 2006-05-09 15:50:43.000000000 -0700
@@ -2294,7 +2294,7 @@ static int ntfs_file_fsync(struct file *

const struct file_operations ntfs_file_ops = {
.llseek = generic_file_llseek, /* Seek inside file. */
- .read = generic_file_read, /* Read from file. */
+ .read = do_sync_read, /* Read from file. */
.aio_read = generic_file_aio_read, /* Async read from file. */
#ifdef NTFS_RW
.write = ntfs_file_write, /* Write to file. */
Index: linux-2.6.17-rc3.save/fs/qnx4/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/qnx4/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/qnx4/file.c 2006-05-09 15:18:10.000000000 -0700
@@ -22,11 +22,13 @@
const struct file_operations qnx4_file_operations =
{
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
#ifdef CONFIG_QNX4FS_RW
- .write = generic_file_write,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.fsync = qnx4_sync_file,
#endif
};
Index: linux-2.6.17-rc3.save/fs/ramfs/file-mmu.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ramfs/file-mmu.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ramfs/file-mmu.c 2006-05-09 15:19:34.000000000 -0700
@@ -33,8 +33,10 @@ struct address_space_operations ramfs_ao
};

const struct file_operations ramfs_file_operations = {
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = simple_sync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6.17-rc3.save/fs/ramfs/file-nommu.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ramfs/file-nommu.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ramfs/file-nommu.c 2006-05-09 15:20:37.000000000 -0700
@@ -36,8 +36,10 @@ struct address_space_operations ramfs_ao
const struct file_operations ramfs_file_operations = {
.mmap = ramfs_nommu_mmap,
.get_unmapped_area = ramfs_nommu_get_unmapped_area,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.fsync = simple_sync_file,
.sendfile = generic_file_sendfile,
.llseek = generic_file_llseek,
Index: linux-2.6.17-rc3.save/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/read_write.c 2006-05-09 14:11:53.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/read_write.c 2006-05-09 15:21:53.000000000 -0700
@@ -22,7 +22,8 @@

const struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_readonly_mmap,
.sendfile = generic_file_sendfile,
};
Index: linux-2.6.17-rc3.save/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/fs.h 2006-05-09 14:11:53.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/fs.h 2006-05-09 15:41:52.000000000 -0700
@@ -1594,9 +1594,7 @@ extern int generic_file_mmap(struct file
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
-extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
-extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *);
extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
Index: linux-2.6.17-rc3.save/mm/filemap.c
===================================================================
--- linux-2.6.17-rc3.save.orig/mm/filemap.c 2006-05-09 14:11:51.000000000 -0700
+++ linux-2.6.17-rc3.save/mm/filemap.c 2006-05-09 15:41:20.000000000 -0700
@@ -1104,22 +1104,6 @@ generic_file_aio_read(struct kiocb *iocb
}
EXPORT_SYMBOL(generic_file_aio_read);

-ssize_t
-generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
- struct iovec local_iov = { .iov_base = buf, .iov_len = count };
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-
-EXPORT_SYMBOL(generic_file_read);
-
int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
{
ssize_t written;
@@ -2185,21 +2169,6 @@ ssize_t generic_file_aio_write_nolock(st
}
EXPORT_SYMBOL(generic_file_aio_write_nolock);

-static ssize_t
-__generic_file_write_nolock(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-
ssize_t
generic_file_write_nolock(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos)
@@ -2242,30 +2211,6 @@ ssize_t generic_file_aio_write(struct ki
}
EXPORT_SYMBOL(generic_file_aio_write);

-ssize_t generic_file_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret;
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };
-
- mutex_lock(&inode->i_mutex);
- ret = __generic_file_write_nolock(file, &local_iov, 1, ppos);
- mutex_unlock(&inode->i_mutex);
-
- if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- ssize_t err;
-
- err = sync_page_range(inode, mapping, *ppos - ret, ret);
- if (err < 0)
- ret = err;
- }
- return ret;
-}
-EXPORT_SYMBOL(generic_file_write);
-
/*
* Called under i_mutex for writes to S_ISREG files. Returns -EIO if something
* went wrong during pagecache shootdown.




2006-05-10 08:01:05

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods

On Tue, May 09, 2006 at 04:57:42PM -0700, Badari Pulavarty wrote:
> > > we can get the followup patch done within the next week or two so we can
> > > get it all tested at the same time. Although from your description it
> > > doesn't sound like it'll be completely trivial...
> >
> > That patch is lots of tirival and boring work. If anyone wants to beat
> > me to it:
>
> Well, I am not sure if you mean *exactly* this..
>
> So far, I have this. I really don't like the idea of
> adding .aio_read/.aio_write methods for the filesystems who currently
> don't have one (so we can force their .read/.write to do_sync_*()).

Why don't you like this idea? It helps to sort out callers into two
categories. The following is a something I wrote up to put into
Documentation/ somewhere once these patches are in.


-------- snip --------

There are two ways to implement read/write for filesystems and drivers:

The simple way is to implement the read and write methods. Normal
synchronous, single buffer requests are handed directly to the driver in
this case. Vectored requests are emulated using a loop in the higher
level code. AIO requests are silently performed synchronous.
This method is normally used for character drivers and synthetic
filesystems.

The advanced method is to implement the aio_read and aio_write methods.
These allow the request to be done asynchronously and submit multiple
IO vectores in parallel. A page cache based filesystem gets this
functionality by freee by using the routines from filemap.c - in fact
there is not easy way to use the generic page cache code without
implementing aio_read and aio_write. The other big user of this
interface are sockets. Very few character driver need this complexity.

-------- snip --------

> Is there a way to fix callers of .read/.write() methods to use
> something like do_sync_read/write - that way we can take out
> .read/.write completely ?

The only way to fix this is to add some kernel_read/kernel_write helpers
that factor out the use aio_read / aio_write if present and wait for
I/O completion logic from vfs_read/vfs_write. I started on that but it
got very messy.

> Anyway, here it is compiled but untested.. I think I can clean up
> more in filemap.c (after reading through your suggestions). Please
> let me know, if I am on wrong path ...

Currently I don't have time to actually apply the patchlkit and look at
the result, so I'll defer further comments. Beside maybe not doing all
possible cleanups (e.g. I still see generic_file_write_nolock) this
patch looks very good.

2006-05-10 15:00:54

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods

On Wed, 2006-05-10 at 10:00 +0200, Christoph Hellwig wrote:
> On Tue, May 09, 2006 at 04:57:42PM -0700, Badari Pulavarty wrote:
> > > > we can get the followup patch done within the next week or two so we can
> > > > get it all tested at the same time. Although from your description it
> > > > doesn't sound like it'll be completely trivial...
> > >
> > > That patch is lots of tirival and boring work. If anyone wants to beat
> > > me to it:
> >
> > Well, I am not sure if you mean *exactly* this..
> >
> > So far, I have this. I really don't like the idea of
> > adding .aio_read/.aio_write methods for the filesystems who currently
> > don't have one (so we can force their .read/.write to do_sync_*()).
>
> Why don't you like this idea?

Few reasons:

1) I added .aio_read/.aio_write methods for all the filesystems that
are not currently having this, just to make their .read/.write to
do_sync_*().

2) Its just not possible for filesystems ONLY to provide
only .aio_read/.aio_write() interfaces. They have to have .read/.write()
also to handle direct callers :(

3) sys_read/sys_write() will now have an extra indirection:

sys_read() -> vfs_read() -> do_sync_read() -> .aio_read()

where as current code..

sys_read() -> vfs_read() -> .write()

We now have an extra do_sync_read() code, but may be okay.


>
> -------- snip --------
>
> There are two ways to implement read/write for filesystems and drivers:
>
> The simple way is to implement the read and write methods. Normal
> synchronous, single buffer requests are handed directly to the driver in
> this case. Vectored requests are emulated using a loop in the higher
> level code. AIO requests are silently performed synchronous.
> This method is normally used for character drivers and synthetic
> filesystems.
>
> The advanced method is to implement the aio_read and aio_write methods.
> These allow the request to be done asynchronously and submit multiple
> IO vectores in parallel. A page cache based filesystem gets this
> functionality by freee by using the routines from filemap.c - in fact
> there is not easy way to use the generic page cache code without
> implementing aio_read and aio_write. The other big user of this
> interface are sockets. Very few character driver need this complexity.
>
> -------- snip --------

> > Is there a way to fix callers of .read/.write() methods to use
> > something like do_sync_read/write - that way we can take out
> > .read/.write completely ?
>
> The only way to fix this is to add some kernel_read/kernel_write helpers
> that factor out the use aio_read / aio_write if present and wait for
> I/O completion logic from vfs_read/vfs_write. I started on that but it
> got very messy.

Okay. I will take your word for it - I won't bother trying for now :)
>
> > Anyway, here it is compiled but untested.. I think I can clean up
> > more in filemap.c (after reading through your suggestions). Please
> > let me know, if I am on wrong path ...
>
> Currently I don't have time to actually apply the patchlkit and look at
> the result, so I'll defer further comments. Beside maybe not doing all
> possible cleanups (e.g. I still see generic_file_write_nolock) this
> patch looks very good.

I need to take a closer look at generic_file_write_nolock() since I
couldn't eliminate it easily in my first dumb pass. I will also look
at cleanups you suggested. Thanks.

Thanks,
Badari

2006-05-10 16:00:33

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods


I am starting to like this :)

Here is what I have so far (this patch applies on top of the other set).

We will NOW have only up following:

generic_file_aio_read() - read handler

generic_file_aio_write() - write handler
generic_file_aio_write_nolock() - no lock write handler
__generic_file_aio_write_nolock() - internal worker routine
(not exported)



Thanks,
Badari

Get rid of everything other than following generic read/write
interfaces:

generic_file_aio_read() - read handler

generic_file_aio_write() - write handler
generic_file_aio_write_nolock() - no lock write handler

__generic_file_aio_write_nolock() - internal worker routine
(not exported)

Signed-off-by: Badari Pulavarty <[email protected]>

drivers/char/raw.c | 15 +------
fs/adfs/file.c | 6 ++-
fs/affs/file.c | 6 ++-
fs/bfs/file.c | 6 ++-
fs/block_dev.c | 12 +-----
fs/ext2/file.c | 4 +-
fs/fuse/file.c | 6 ++-
fs/hfs/inode.c | 6 ++-
fs/hfsplus/inode.c | 6 ++-
fs/hostfs/hostfs_kern.c | 4 +-
fs/hpfs/file.c | 6 ++-
fs/jffs/inode-v23.c | 6 ++-
fs/jffs2/file.c | 6 ++-
fs/jfs/file.c | 4 +-
fs/minix/file.c | 6 ++-
fs/ntfs/file.c | 2 -
fs/qnx4/file.c | 6 ++-
fs/ramfs/file-mmu.c | 6 ++-
fs/ramfs/file-nommu.c | 6 ++-
fs/read_write.c | 3 +
fs/xfs/linux-2.6/xfs_lrw.c | 4 +-
include/linux/fs.h | 5 --
mm/filemap.c | 88 ++-------------------------------------------
23 files changed, 72 insertions(+), 147 deletions(-)

Index: linux-2.6.17-rc3.save/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc3.save.orig/drivers/char/raw.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/drivers/char/raw.c 2006-05-10 08:29:35.000000000 -0700
@@ -239,21 +239,10 @@ out:
return err;
}

-static ssize_t raw_file_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct iovec local_iov = {
- .iov_base = (char __user *)buf,
- .iov_len = count
- };
-
- return generic_file_write_nolock(file, &local_iov, 1, ppos);
-}
-
static struct file_operations raw_fops = {
- .read = generic_file_read,
+ .read = do_sync_read,
.aio_read = generic_file_aio_read,
- .write = raw_file_write,
+ .write = do_sync_write,
.aio_write = generic_file_aio_write_nolock,
.open = raw_open,
.release= raw_release,
Index: linux-2.6.17-rc3.save/fs/adfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/adfs/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/adfs/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -27,10 +27,12 @@

const struct file_operations adfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
.fsync = file_fsync,
- .write = generic_file_write,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.sendfile = generic_file_sendfile,
};

Index: linux-2.6.17-rc3.save/fs/affs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/affs/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/affs/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -27,8 +27,10 @@ static int affs_file_release(struct inod

const struct file_operations affs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.open = affs_file_open,
.release = affs_file_release,
Index: linux-2.6.17-rc3.save/fs/bfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/bfs/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/bfs/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -19,8 +19,10 @@

const struct file_operations bfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
};
Index: linux-2.6.17-rc3.save/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/block_dev.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/block_dev.c 2006-05-10 08:29:35.000000000 -0700
@@ -1056,14 +1056,6 @@ static int blkdev_close(struct inode * i
return blkdev_put(bdev);
}

-static ssize_t blkdev_file_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return generic_file_write_nolock(file, &local_iov, 1, ppos);
-}
-
static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
@@ -1083,8 +1075,8 @@ const struct file_operations def_blk_fop
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
- .read = generic_file_read,
- .write = blkdev_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write_nolock,
.mmap = generic_file_mmap,
Index: linux-2.6.17-rc3.save/fs/ext2/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ext2/file.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ext2/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -41,8 +41,8 @@ static int ext2_release_file (struct ino
*/
const struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.ioctl = ext2_ioctl,
Index: linux-2.6.17-rc3.save/fs/fuse/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/fuse/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/fuse/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -621,8 +621,10 @@ static int fuse_set_page_dirty(struct pa

static const struct file_operations fuse_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = fuse_file_mmap,
.open = fuse_open,
.flush = fuse_flush,
Index: linux-2.6.17-rc3.save/fs/hfs/inode.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hfs/inode.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hfs/inode.c 2006-05-10 08:29:35.000000000 -0700
@@ -603,8 +603,10 @@ int hfs_inode_setattr(struct dentry *den

static const struct file_operations hfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
.fsync = file_fsync,
Index: linux-2.6.17-rc3.save/fs/hfsplus/inode.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hfsplus/inode.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hfsplus/inode.c 2006-05-10 08:29:35.000000000 -0700
@@ -282,8 +282,10 @@ static struct inode_operations hfsplus_f

static const struct file_operations hfsplus_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
.fsync = file_fsync,
Index: linux-2.6.17-rc3.save/fs/hostfs/hostfs_kern.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hostfs/hostfs_kern.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hostfs/hostfs_kern.c 2006-05-10 08:29:35.000000000 -0700
@@ -386,11 +386,11 @@ int hostfs_fsync(struct file *file, stru

static const struct file_operations hostfs_file_fops = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
- .write = generic_file_write,
+ .write = do_sync_write,
.mmap = generic_file_mmap,
.open = hostfs_file_open,
.release = NULL,
Index: linux-2.6.17-rc3.save/fs/hpfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hpfs/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hpfs/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -113,7 +113,7 @@ static ssize_t hpfs_file_write(struct fi
{
ssize_t retval;

- retval = generic_file_write(file, buf, count, ppos);
+ retval = do_sync_write(file, buf, count, ppos);
if (retval > 0)
hpfs_i(file->f_dentry->d_inode)->i_dirty = 1;
return retval;
@@ -122,8 +122,10 @@ static ssize_t hpfs_file_write(struct fi
const struct file_operations hpfs_file_ops =
{
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.write = hpfs_file_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.release = hpfs_file_release,
.fsync = hpfs_file_fsync,
Index: linux-2.6.17-rc3.save/fs/jffs/inode-v23.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/jffs/inode-v23.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/jffs/inode-v23.c 2006-05-10 08:29:35.000000000 -0700
@@ -1633,8 +1633,10 @@ static const struct file_operations jffs
{
.open = generic_file_open,
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.ioctl = jffs_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs_fsync,
Index: linux-2.6.17-rc3.save/fs/jffs2/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/jffs2/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/jffs2/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -42,8 +42,10 @@ const struct file_operations jffs2_file_
{
.llseek = generic_file_llseek,
.open = generic_file_open,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.ioctl = jffs2_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs2_fsync,
Index: linux-2.6.17-rc3.save/fs/jfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/jfs/file.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/jfs/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -103,8 +103,8 @@ struct inode_operations jfs_file_inode_o
const struct file_operations jfs_file_operations = {
.open = jfs_open,
.llseek = generic_file_llseek,
- .write = generic_file_write,
- .read = generic_file_read,
+ .write = do_sync_write,
+ .read = do_sync_read,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
Index: linux-2.6.17-rc3.save/fs/minix/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/minix/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/minix/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -17,8 +17,10 @@ int minix_sync_file(struct file *, struc

const struct file_operations minix_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = minix_sync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6.17-rc3.save/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ntfs/file.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ntfs/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -2294,7 +2294,7 @@ static int ntfs_file_fsync(struct file *

const struct file_operations ntfs_file_ops = {
.llseek = generic_file_llseek, /* Seek inside file. */
- .read = generic_file_read, /* Read from file. */
+ .read = do_sync_read, /* Read from file. */
.aio_read = generic_file_aio_read, /* Async read from file. */
#ifdef NTFS_RW
.write = ntfs_file_write, /* Write to file. */
Index: linux-2.6.17-rc3.save/fs/qnx4/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/qnx4/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/qnx4/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -22,11 +22,13 @@
const struct file_operations qnx4_file_operations =
{
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
#ifdef CONFIG_QNX4FS_RW
- .write = generic_file_write,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.fsync = qnx4_sync_file,
#endif
};
Index: linux-2.6.17-rc3.save/fs/ramfs/file-mmu.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ramfs/file-mmu.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ramfs/file-mmu.c 2006-05-10 08:29:35.000000000 -0700
@@ -33,8 +33,10 @@ struct address_space_operations ramfs_ao
};

const struct file_operations ramfs_file_operations = {
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = simple_sync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6.17-rc3.save/fs/ramfs/file-nommu.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ramfs/file-nommu.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ramfs/file-nommu.c 2006-05-10 08:29:35.000000000 -0700
@@ -36,8 +36,10 @@ struct address_space_operations ramfs_ao
const struct file_operations ramfs_file_operations = {
.mmap = ramfs_nommu_mmap,
.get_unmapped_area = ramfs_nommu_get_unmapped_area,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.fsync = simple_sync_file,
.sendfile = generic_file_sendfile,
.llseek = generic_file_llseek,
Index: linux-2.6.17-rc3.save/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/read_write.c 2006-05-10 08:29:26.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/read_write.c 2006-05-10 08:29:35.000000000 -0700
@@ -22,7 +22,8 @@

const struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_readonly_mmap,
.sendfile = generic_file_sendfile,
};
Index: linux-2.6.17-rc3.save/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/fs.h 2006-05-10 08:29:26.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/fs.h 2006-05-10 09:00:37.000000000 -0700
@@ -1594,11 +1594,8 @@ extern int generic_file_mmap(struct file
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
-extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
-extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *);
extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
@@ -1608,8 +1605,6 @@ extern ssize_t generic_file_buffered_wri
unsigned long, loff_t, loff_t *, size_t, ssize_t);
extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
-ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
extern void do_generic_mapping_read(struct address_space *mapping,
struct file_ra_state *, struct file *,
Index: linux-2.6.17-rc3.save/mm/filemap.c
===================================================================
--- linux-2.6.17-rc3.save.orig/mm/filemap.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/mm/filemap.c 2006-05-10 08:44:01.000000000 -0700
@@ -1018,13 +1018,14 @@ success:
* that can use the page cache directly.
*/
ssize_t
-__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *filp = iocb->ki_filp;
ssize_t retval;
unsigned long seg;
size_t count;
+ loff_t *ppos = &iocb->ki_pos;

count = 0;
for (seg = 0; seg < nr_segs; seg++) {
@@ -1048,7 +1049,7 @@ __generic_file_aio_read(struct kiocb *io

/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
if (filp->f_flags & O_DIRECT) {
- loff_t pos = *ppos, size;
+ loff_t size;
struct address_space *mapping;
struct inode *inode;

@@ -1093,33 +1094,8 @@ out:
return retval;
}

-EXPORT_SYMBOL(__generic_file_aio_read);
-
-ssize_t
-generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- BUG_ON(iocb->ki_pos != pos);
- return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos);
-}
EXPORT_SYMBOL(generic_file_aio_read);

-ssize_t
-generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
- struct iovec local_iov = { .iov_base = buf, .iov_len = count };
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-
-EXPORT_SYMBOL(generic_file_read);
-
int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
{
ssize_t written;
@@ -2185,38 +2161,6 @@ ssize_t generic_file_aio_write_nolock(st
}
EXPORT_SYMBOL(generic_file_aio_write_nolock);

-static ssize_t
-__generic_file_write_nolock(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-
-ssize_t
-generic_file_write_nolock(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, *ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- *ppos = kiocb.ki_pos;
- return ret;
-}
-EXPORT_SYMBOL(generic_file_write_nolock);
-
ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -2242,30 +2186,6 @@ ssize_t generic_file_aio_write(struct ki
}
EXPORT_SYMBOL(generic_file_aio_write);

-ssize_t generic_file_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret;
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };
-
- mutex_lock(&inode->i_mutex);
- ret = __generic_file_write_nolock(file, &local_iov, 1, ppos);
- mutex_unlock(&inode->i_mutex);
-
- if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- ssize_t err;
-
- err = sync_page_range(inode, mapping, *ppos - ret, ret);
- if (err < 0)
- ret = err;
- }
- return ret;
-}
-EXPORT_SYMBOL(generic_file_write);
-
/*
* Called under i_mutex for writes to S_ISREG files. Returns -EIO if something
* went wrong during pagecache shootdown.
Index: linux-2.6.17-rc3.save/fs/xfs/linux-2.6/xfs_lrw.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/xfs/linux-2.6/xfs_lrw.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/xfs/linux-2.6/xfs_lrw.c 2006-05-10 08:45:52.000000000 -0700
@@ -276,7 +276,9 @@ xfs_read(

xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
(void *)iovp, segs, *offset, ioflags);
- ret = __generic_file_aio_read(iocb, iovp, segs, offset);
+
+ iocb->ki_pos = *offset;
+ ret = generic_file_aio_read(iocb, iovp, segs, *offset);
if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
ret = wait_on_sync_kiocb(iocb);
if (ret > 0)



2006-05-10 20:50:51

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 1/3] Vectorize aio_read/aio_write methods

Andrew,

If you haven't picked these patches into -mm yet, can you hold off till
tomorrow ? I have an updated version with few minor fixes + I am almost
ready with the filemap.c cleanups. I am currently testing those and haven't
found any blockers.

Thanks,
Badari


2006-05-11 15:37:03

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 0/4] VFS fileop cleanups by collapsing AIO and vector IO

Hi,

These series of patches clean up and streamlines generic_file_*
interfaces in filemap.c.

First (3) patches collapses all the vectored IO support into
single set of file-operation method using aio_read/aio_write.
This work was originally suggested & started by Christoph Hellwig,
when Zach Brown tried to add vectored support for AIO.

Patch 4, sets all the filesystems .read/.write/.aio_read/.aio_write
methods correctly to allow us to cleanup most generic_file_*_read/write
interfaces in filemap.c

After this patch set, we should end up with ONLY following
read/write (exported) interfaces in filemap.c:

generic_file_aio_read() - read handler
generic_file_aio_write() - write handler
generic_file_aio_write_nolock() - no lock write handler

Here is the summary:

[PATCH 1/4] Vectorize aio_read/aio_write methods

[PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write
instead.

[PATCH 3/4] Core aio changes to support vectored AIO.

[PATCH 4/4] Streamline generic_file_* interfaces and filemap cleanups

BTW, Chuck Lever is actually re-arranging NFS DIO, AIO code to
fit into this model.

Thanks to Chuck Lever and Shaggy for tracking down the latest
set of issues. Big Thanks Christoph Hellwig for all his ideas
and suggestions.

I ran various testing including LTP on this series. Andrew,
can you include these in -mm tree ?

Thanks,
Badari


2006-05-11 15:37:50

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 1/4] Vectorize aio_read/aio_write methods

This patch vectorizes aio_read() and aio_write() methods to prepare
for collapsing all aio & vectored operations into one interface -
which is aio_read()/aio_write().


Signed-off-by: Christoph Hellwig <[email protected]>
Signed-off-by: Badari Pulavarty <[email protected]>

Documentation/filesystems/Locking | 5 +-
Documentation/filesystems/vfs.txt | 4 +-
drivers/char/raw.c | 14 -------
drivers/usb/gadget/inode.c | 71 +++++++++++++++++++++++++++-----------
fs/aio.c | 15 +++++---
fs/block_dev.c | 10 -----
fs/cifs/cifsfs.c | 6 +--
fs/ext3/file.c | 5 +-
fs/nfs/direct.c | 24 +++++++++---
fs/nfs/file.c | 43 ++++++++++++-----------
fs/ntfs/file.c | 8 +---
fs/ocfs2/file.c | 28 ++++++--------
fs/read_write.c | 20 ++++++++--
fs/reiserfs/file.c | 12 +-----
fs/xfs/linux-2.6/xfs_file.c | 44 +++++++++++------------
include/linux/aio.h | 2 +
include/linux/fs.h | 10 ++---
include/linux/nfs_fs.h | 8 ++--
include/net/sock.h | 1
mm/filemap.c | 38 +++++++++-----------
net/socket.c | 48 ++++++++++++-------------
21 files changed, 226 insertions(+), 190 deletions(-)

Index: linux-2.6.17-rc3.save/Documentation/filesystems/Locking
===================================================================
--- linux-2.6.17-rc3.save.orig/Documentation/filesystems/Locking 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/Documentation/filesystems/Locking 2006-05-02 07:53:58.000000000 -0700
@@ -355,10 +355,9 @@ The last two are called only from check_
prototypes:
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t,
- loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int,
Index: linux-2.6.17-rc3.save/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.17-rc3.save.orig/Documentation/filesystems/vfs.txt 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/Documentation/filesystems/vfs.txt 2006-05-02 07:53:58.000000000 -0700
@@ -699,9 +699,9 @@ This describes how the VFS can manipulat
struct file_operations {
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
Index: linux-2.6.17-rc3.save/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc3.save.orig/drivers/char/raw.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/drivers/char/raw.c 2006-05-11 08:24:23.327478280 -0700
@@ -250,23 +250,11 @@ static ssize_t raw_file_write(struct fil
return generic_file_write_nolock(file, &local_iov, 1, ppos);
}

-static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
-{
- struct iovec local_iov = {
- .iov_base = (char __user *)buf,
- .iov_len = count
- };
-
- return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-}
-
-
static struct file_operations raw_fops = {
.read = generic_file_read,
.aio_read = generic_file_aio_read,
.write = raw_file_write,
- .aio_write = raw_file_aio_write,
+ .aio_write = generic_file_aio_write_nolock,
.open = raw_open,
.release= raw_release,
.ioctl = raw_ioctl,
Index: linux-2.6.17-rc3.save/fs/aio.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/aio.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/aio.c 2006-05-11 08:23:43.842480912 -0700
@@ -15,6 +15,7 @@
#include <linux/aio_abi.h>
#include <linux/module.h>
#include <linux/syscalls.h>
+#include <linux/uio.h>

#define DEBUG 0

@@ -1315,8 +1316,11 @@ static ssize_t aio_pread(struct kiocb *i
ssize_t ret = 0;

do {
- ret = file->f_op->aio_read(iocb, iocb->ki_buf,
- iocb->ki_left, iocb->ki_pos);
+ iocb->ki_inline_vec.iov_base = iocb->ki_buf;
+ iocb->ki_inline_vec.iov_len = iocb->ki_left;
+
+ ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec,
+ 1, iocb->ki_pos);
/*
* Can't just depend on iocb->ki_left to determine
* whether we are done. This may have been a short read.
@@ -1349,8 +1353,11 @@ static ssize_t aio_pwrite(struct kiocb *
ssize_t ret = 0;

do {
- ret = file->f_op->aio_write(iocb, iocb->ki_buf,
- iocb->ki_left, iocb->ki_pos);
+ iocb->ki_inline_vec.iov_base = iocb->ki_buf;
+ iocb->ki_inline_vec.iov_len = iocb->ki_left;
+
+ ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec,
+ 1, iocb->ki_pos);
if (ret > 0) {
iocb->ki_buf += ret;
iocb->ki_left -= ret;
Index: linux-2.6.17-rc3.save/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/block_dev.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/block_dev.c 2006-05-11 08:24:23.323478888 -0700
@@ -1064,14 +1064,6 @@ static ssize_t blkdev_file_write(struct
return generic_file_write_nolock(file, &local_iov, 1, ppos);
}

-static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
-{
- struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-}
-
static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
@@ -1094,7 +1086,7 @@ const struct file_operations def_blk_fop
.read = generic_file_read,
.write = blkdev_file_write,
.aio_read = generic_file_aio_read,
- .aio_write = blkdev_file_aio_write,
+ .aio_write = generic_file_aio_write_nolock,
.mmap = generic_file_mmap,
.fsync = block_fsync,
.unlocked_ioctl = block_ioctl,
Index: linux-2.6.17-rc3.save/fs/cifs/cifsfs.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/cifs/cifsfs.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/cifs/cifsfs.c 2006-05-11 08:24:23.322479040 -0700
@@ -496,13 +496,13 @@ static ssize_t cifs_file_writev(struct f
return written;
}

-static ssize_t cifs_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
ssize_t written;

- written = generic_file_aio_write(iocb, buf, count, pos);
+ written = generic_file_aio_write(iocb, iov, nr_segs, pos);
if (!CIFS_I(inode)->clientCanCacheAll)
filemap_fdatawrite(inode->i_mapping);
return written;
Index: linux-2.6.17-rc3.save/fs/ext3/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ext3/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ext3/file.c 2006-05-11 08:24:23.322479040 -0700
@@ -48,14 +48,15 @@ static int ext3_release_file (struct ino
}

static ssize_t
-ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_dentry->d_inode;
ssize_t ret;
int err;

- ret = generic_file_aio_write(iocb, buf, count, pos);
+ ret = generic_file_aio_write(iocb, iov, nr_segs, pos);

/*
* Skip flushing if there was an error, or if nothing was written.
Index: linux-2.6.17-rc3.save/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/read_write.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/read_write.c 2006-05-11 08:24:23.325478584 -0700
@@ -227,14 +227,20 @@ static void wait_on_retry_sync_kiocb(str

ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
+ struct iovec iov = { .iov_base = buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;

init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- while (-EIOCBRETRY ==
- (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
+ kiocb.ki_left = len;
+
+ for (;;) {
+ ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
wait_on_retry_sync_kiocb(&kiocb);
+ }

if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
@@ -279,14 +285,20 @@ EXPORT_SYMBOL(vfs_read);

ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{
+ struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;

init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- while (-EIOCBRETRY ==
- (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos)))
+ kiocb.ki_left = len;
+
+ for (;;) {
+ ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
wait_on_retry_sync_kiocb(&kiocb);
+ }

if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
Index: linux-2.6.17-rc3.save/fs/reiserfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/reiserfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/reiserfs/file.c 2006-05-09 15:31:55.000000000 -0700
@@ -1329,7 +1329,7 @@ static ssize_t reiserfs_file_write(struc
if (err)
return err;
}
- result = generic_file_write(file, buf, count, ppos);
+ result = do_sync_write(file, buf, count, ppos);

if (after_file_end) { /* Now update i_size and remove the savelink */
struct reiserfs_transaction_handle th;
@@ -1560,14 +1560,8 @@ static ssize_t reiserfs_file_write(struc
return res;
}

-static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
- size_t count, loff_t pos)
-{
- return generic_file_aio_write(iocb, buf, count, pos);
-}
-
const struct file_operations reiserfs_file_operations = {
- .read = generic_file_read,
+ .read = do_sync_read,
.write = reiserfs_file_write,
.ioctl = reiserfs_ioctl,
.mmap = generic_file_mmap,
@@ -1575,7 +1569,7 @@ const struct file_operations reiserfs_fi
.fsync = reiserfs_sync_file,
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
- .aio_write = reiserfs_aio_write,
+ .aio_write = generic_file_aio_write,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
Index: linux-2.6.17-rc3.save/fs/xfs/linux-2.6/xfs_file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/xfs/linux-2.6/xfs_file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/xfs/linux-2.6/xfs_file.c 2006-05-11 08:24:23.321479192 -0700
@@ -51,12 +51,11 @@ static struct vm_operations_struct xfs_d
STATIC inline ssize_t
__xfs_file_read(
struct kiocb *iocb,
- char __user *buf,
+ const struct iovec *iov,
+ unsigned long nr_segs,
int ioflags,
- size_t count,
loff_t pos)
{
- struct iovec iov = {buf, count};
struct file *file = iocb->ki_filp;
vnode_t *vp = vn_from_inode(file->f_dentry->d_inode);
ssize_t rval;
@@ -65,39 +64,38 @@ __xfs_file_read(

if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
- VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+ VOP_READ(vp, iocb, iov, nr_segs, &iocb->ki_pos, ioflags, NULL, rval);
return rval;
}

STATIC ssize_t
xfs_file_aio_read(
struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_read(iocb, buf, IO_ISAIO, count, pos);
+ return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
}

STATIC ssize_t
xfs_file_aio_read_invis(
struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+ return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

STATIC inline ssize_t
__xfs_file_write(
- struct kiocb *iocb,
- const char __user *buf,
- int ioflags,
- size_t count,
- loff_t pos)
+ struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs,
+ int ioflags,
+ loff_t pos)
{
- struct iovec iov = {(void __user *)buf, count};
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
vnode_t *vp = vn_from_inode(inode);
@@ -107,28 +105,28 @@ __xfs_file_write(
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;

- VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+ VOP_WRITE(vp, iocb, iov, nr_segs, &iocb->ki_pos, ioflags, NULL, rval);
return rval;
}

STATIC ssize_t
xfs_file_aio_write(
struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_write(iocb, buf, IO_ISAIO, count, pos);
+ return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
}

STATIC ssize_t
xfs_file_aio_write_invis(
struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+ return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

STATIC inline ssize_t
Index: linux-2.6.17-rc3.save/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/fs.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/fs.h 2006-05-11 08:24:23.328478128 -0700
@@ -1015,9 +1015,9 @@ struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
@@ -1594,11 +1594,11 @@ extern int file_send_actor(read_descript
extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *);
-extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t);
+extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *);
-extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t);
+extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
- unsigned long, loff_t *);
+ unsigned long, loff_t);
extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
unsigned long *, loff_t, loff_t *, size_t, size_t);
extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
Index: linux-2.6.17-rc3.save/include/net/sock.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/net/sock.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/include/net/sock.h 2006-05-02 07:53:58.000000000 -0700
@@ -659,7 +659,6 @@ struct sock_iocb {
struct sock *sk;
struct scm_cookie *scm;
struct msghdr *msg, async_msg;
- struct iovec async_iov;
struct kiocb *kiocb;
};

Index: linux-2.6.17-rc3.save/mm/filemap.c
===================================================================
--- linux-2.6.17-rc3.save.orig/mm/filemap.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/mm/filemap.c 2006-05-11 08:24:23.326478432 -0700
@@ -1096,14 +1096,12 @@ out:
EXPORT_SYMBOL(__generic_file_aio_read);

ssize_t
-generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
- struct iovec local_iov = { .iov_base = buf, .iov_len = count };
-
BUG_ON(iocb->ki_pos != pos);
- return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
+ return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos);
}
-
EXPORT_SYMBOL(generic_file_aio_read);

ssize_t
@@ -2163,22 +2161,21 @@ out:
current->backing_dev_info = NULL;
return written ? written : err;
}
-EXPORT_SYMBOL(generic_file_aio_write_nolock);

-ssize_t
-generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
+ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
+ const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- loff_t pos = *ppos;

- ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos);
+ BUG_ON(iocb->ki_pos != pos);
+
+ ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);

if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
+ ssize_t err;

err = sync_page_range_nolock(inode, mapping, pos, ret);
if (err < 0)
@@ -2186,6 +2183,7 @@ generic_file_aio_write_nolock(struct kio
}
return ret;
}
+EXPORT_SYMBOL(generic_file_aio_write_nolock);

static ssize_t
__generic_file_write_nolock(struct file *file, const struct iovec *iov,
@@ -2195,8 +2193,9 @@ __generic_file_write_nolock(struct file
ssize_t ret;

init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = *ppos;
ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
- if (ret == -EIOCBQUEUED)
+ if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
return ret;
}
@@ -2209,28 +2208,27 @@ generic_file_write_nolock(struct file *f
ssize_t ret;

init_sync_kiocb(&kiocb, file);
- ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
+ kiocb.ki_pos = *ppos;
+ ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, *ppos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
return ret;
}
EXPORT_SYMBOL(generic_file_write_nolock);

-ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };

BUG_ON(iocb->ki_pos != pos);

mutex_lock(&inode->i_mutex);
- ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1,
- &iocb->ki_pos);
+ ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);

if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
Index: linux-2.6.17-rc3.save/net/socket.c
===================================================================
--- linux-2.6.17-rc3.save.orig/net/socket.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/net/socket.c 2006-05-11 08:24:23.326478432 -0700
@@ -96,10 +96,10 @@
#include <linux/netfilter.h>

static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
- size_t size, loff_t pos);
-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t size, loff_t pos);
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
static int sock_mmap(struct file *file, struct vm_area_struct * vma);

static int sock_close(struct inode *inode, struct file *file);
@@ -700,7 +700,7 @@ static ssize_t sock_sendpage(struct file
}

static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
- char __user *ubuf, size_t size, struct sock_iocb *siocb)
+ struct sock_iocb *siocb)
{
if (!is_sync_kiocb(iocb)) {
siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
@@ -710,15 +710,13 @@ static struct sock_iocb *alloc_sock_iocb
}

siocb->kiocb = iocb;
- siocb->async_iov.iov_base = ubuf;
- siocb->async_iov.iov_len = size;
-
iocb->private = siocb;
return siocb;
}

static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
- struct file *file, struct iovec *iov, unsigned long nr_segs)
+ struct file *file, const struct iovec *iov,
+ unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
@@ -749,31 +747,33 @@ static ssize_t sock_readv(struct file *f
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;

- ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+ ret = do_sock_read(&msg, &iocb, file, iov, nr_segs);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}

-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
- size_t count, loff_t pos)
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;

if (pos != 0)
return -ESPIPE;
- if (count == 0) /* Match SYS5 behaviour */
+
+ if (iocb->ki_left == 0) /* Match SYS5 behaviour */
return 0;

- x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
+
+ x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;
- return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
- &x->async_iov, 1);
+ return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}

static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
- struct file *file, struct iovec *iov, unsigned long nr_segs)
+ struct file *file, const struct iovec *iov,
+ unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
@@ -806,28 +806,28 @@ static ssize_t sock_writev(struct file *
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;

- ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+ ret = do_sock_write(&msg, &iocb, file, iov, nr_segs);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}

-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
- size_t count, loff_t pos)
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;

if (pos != 0)
return -ESPIPE;
- if (count == 0) /* Match SYS5 behaviour */
+
+ if (iocb->ki_left == 0) /* Match SYS5 behaviour */
return 0;

- x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
+ x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;

- return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
- &x->async_iov, 1);
+ return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}


Index: linux-2.6.17-rc3.save/drivers/usb/gadget/inode.c
===================================================================
--- linux-2.6.17-rc3.save.orig/drivers/usb/gadget/inode.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/drivers/usb/gadget/inode.c 2006-05-11 08:25:14.479701960 -0700
@@ -528,7 +528,8 @@ struct kiocb_priv {
struct usb_request *req;
struct ep_data *epdata;
void *buf;
- char __user *ubuf;
+ struct iovec *iv;
+ unsigned long count;
unsigned actual;
};

@@ -556,18 +557,32 @@ static int ep_aio_cancel(struct kiocb *i
static ssize_t ep_aio_read_retry(struct kiocb *iocb)
{
struct kiocb_priv *priv = iocb->private;
- ssize_t status = priv->actual;
+ ssize_t len, total;

/* we "retry" to get the right mm context for this: */
- status = copy_to_user(priv->ubuf, priv->buf, priv->actual);
- if (unlikely(0 != status))
- status = -EFAULT;
- else
- status = priv->actual;
+
+ /* copy stuff into user buffers */
+ total = priv->actual;
+ len = 0;
+ for (i=0; i < priv->count; i++) {
+ ssize_t this = min(priv->iv[i].iov_len, total);
+
+ if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this))
+ break;
+
+ total -= this;
+ len += this;
+ if (total <= 0)
+ break;
+ }
+
+ if (unlikely(len == 0))
+ len = -EFAULT;
+
kfree(priv->buf);
kfree(priv);
aio_put_req(iocb);
- return status;
+ return len;
}

static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
@@ -615,7 +630,8 @@ ep_aio_rwtail(
char *buf,
size_t len,
struct ep_data *epdata,
- char __user *ubuf
+ const struct iovec *iv,
+ unsigned long count
)
{
struct kiocb_priv *priv = (void *) &iocb->private;
@@ -630,7 +646,8 @@ fail:
return value;
}
iocb->private = priv;
- priv->ubuf = ubuf;
+ priv->iovec = iv;
+ priv->count = count;

value = get_ready_ep(iocb->ki_filp->f_flags, epdata);
if (unlikely(value < 0)) {
@@ -675,36 +692,52 @@ fail:
}

static ssize_t
-ep_aio_read(struct kiocb *iocb, char __user *ubuf, size_t len, loff_t o)
+ep_aio_read(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t o)
{
struct ep_data *epdata = iocb->ki_filp->private_data;
char *buf;
+ size_t len;
+ int i = 0;
+ ssize_t ret;

if (unlikely(epdata->desc.bEndpointAddress & USB_DIR_IN))
return -EINVAL;
- buf = kmalloc(len, GFP_KERNEL);
+
+ buf = kmalloc(iocb->ki_left, GFP_KERNEL);
if (unlikely(!buf))
return -ENOMEM;
+
iocb->ki_retry = ep_aio_read_retry;
- return ep_aio_rwtail(iocb, buf, len, epdata, ubuf);
+ return ep_aio_rwtail(iocb, buf, len, epdata, iv, count);
}

static ssize_t
-ep_aio_write(struct kiocb *iocb, const char __user *ubuf, size_t len, loff_t o)
+ep_aio_write(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t o)
{
struct ep_data *epdata = iocb->ki_filp->private_data;
char *buf;
+ size_t len = 0;
+ int i = 0;
+ ssize_t ret;

if (unlikely(!(epdata->desc.bEndpointAddress & USB_DIR_IN)))
return -EINVAL;
- buf = kmalloc(len, GFP_KERNEL);
+
+ buf = kmalloc(iocb->ki_left, GFP_KERNEL);
if (unlikely(!buf))
return -ENOMEM;
- if (unlikely(copy_from_user(buf, ubuf, len) != 0)) {
- kfree(buf);
- return -EFAULT;
+
+ for (i=0; i < count; i++) {
+ if (unlikely(copy_from_user(&buf[len], iv[i]->iov_base,
+ iv[i]->iov_len) != 0)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+ len += iv[i]->iov_len;
}
- return ep_aio_rwtail(iocb, buf, len, epdata, NULL);
+ return ep_aio_rwtail(iocb, buf, len, epdata, NULL, 0);
}

/*----------------------------------------------------------------------*/
Index: linux-2.6.17-rc3.save/include/linux/aio.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/aio.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/aio.h 2006-05-11 08:23:43.843480760 -0700
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/aio_abi.h>
+#include <linux/uio.h>

#include <asm/atomic.h>

@@ -112,6 +113,7 @@ struct kiocb {
long ki_retried; /* just for testing */
long ki_kicked; /* just for testing */
long ki_queued; /* just for testing */
+ struct iovec ki_inline_vec; /* inline vector */

struct list_head ki_list; /* the aio core uses this
* for cancellation */
Index: linux-2.6.17-rc3.save/fs/nfs/direct.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/nfs/direct.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/nfs/direct.c 2006-05-02 08:31:58.000000000 -0700
@@ -745,8 +745,8 @@ static ssize_t nfs_direct_write(struct k
/**
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
- * @buf: user's buffer into which to read data
- * @count: number of bytes to read
+ * @iov: vector of user buffers into which to read data
+ * @nr_segs: size of iov vector
* @pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
@@ -763,19 +763,25 @@ static ssize_t nfs_direct_write(struct k
* client must read the updated atime from the server back into its
* cache.
*/
-ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
ssize_t retval = -EINVAL;
int page_count;
struct page **pages;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
+ /* XXX: temporary */
+ const char __user *buf = iov[0].iov_base;
+ size_t count = iov[0].iov_len;

dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);

+ if (nr_segs != 1)
+ return -EINVAL;
+
if (count < 0)
goto out;
retval = -EFAULT;
@@ -807,8 +813,8 @@ out:
/**
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
- * @buf: user's buffer from which to write data
- * @count: number of bytes to write
+ * @iov: vector of user buffers from which to write data
+ * @nr_segs: size of iov vector
* @pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
@@ -829,19 +835,25 @@ out:
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
ssize_t retval;
int page_count;
struct page **pages;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
+ /* XXX: temporary */
+ const char __user *buf = iov[0].iov_base;
+ size_t count = iov[0].iov_len;

dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);

+ if (nr_segs != 1)
+ return -EINVAL;
+
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
goto out;
Index: linux-2.6.17-rc3.save/fs/nfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/nfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/nfs/file.c 2006-05-02 08:31:58.000000000 -0700
@@ -41,8 +41,8 @@ static int nfs_file_release(struct inode
static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
static int nfs_file_mmap(struct file *, struct vm_area_struct *);
static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
-static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
-static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
+static ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos);
+static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos);
static int nfs_file_flush(struct file *);
static int nfs_fsync(struct file *, struct dentry *dentry, int datasync);
static int nfs_check_flags(int flags);
@@ -53,8 +53,8 @@ const struct file_operations nfs_file_op
.llseek = nfs_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .aio_read = nfs_file_read,
- .aio_write = nfs_file_write,
+ .aio_read = nfs_file_read,
+ .aio_write = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs_file_open,
.flush = nfs_file_flush,
@@ -212,26 +212,30 @@ nfs_file_flush(struct file *file)
return status;
}

-static ssize_t
-nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
+static ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
+ unsigned long seg;
+ size_t count = 0;
+
+ for (seg = 0; seg < nr_segs; seg++)
+ count += iov[seg].iov_len;

#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_read(iocb, buf, count, pos);
+ return nfs_file_direct_read(iocb, iov, nr_segs, pos);
#endif

- dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
+ dfprintk(VFS, "nfs: read(%s/%s, %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- (unsigned long) count, (unsigned long) pos);
+ (unsigned long) count, (long long) pos);

result = nfs_revalidate_file(inode, iocb->ki_filp);
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
if (!result)
- result = generic_file_aio_read(iocb, buf, count, pos);
+ result = generic_file_aio_read(iocb, iov, nr_segs, pos);
return result;
}

@@ -343,24 +347,25 @@ struct address_space_operations nfs_file
#endif
};

-/*
- * Write to a file (through the page cache).
- */
-static ssize_t
-nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
+ unsigned long seg;
+ size_t count = 0;
+
+ for (seg = 0; seg < nr_segs; seg++)
+ count += iov[seg].iov_len;

#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_write(iocb, buf, count, pos);
+ return nfs_file_direct_write(iocb, iov, nr_segs, pos);
#endif

- dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
+ dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- inode->i_ino, (unsigned long) count, (unsigned long) pos);
+ inode->i_ino, (unsigned long) count, (long long) pos);

result = -EBUSY;
if (IS_SWAPFILE(inode))
@@ -380,7 +385,7 @@ nfs_file_write(struct kiocb *iocb, const
goto out;

nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
- result = generic_file_aio_write(iocb, buf, count, pos);
+ result = generic_file_aio_write(iocb, iov, nr_segs, pos);
out:
return result;

Index: linux-2.6.17-rc3.save/include/linux/nfs_fs.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/nfs_fs.h 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/nfs_fs.h 2006-05-02 08:31:58.000000000 -0700
@@ -359,10 +359,10 @@ extern int nfs3_removexattr (struct dent
*/
extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
unsigned long);
-extern ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf,
- size_t count, loff_t pos);
-extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos);
+extern ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
+extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);

/*
* linux/fs/nfs/dir.c
Index: linux-2.6.17-rc3.save/fs/ocfs2/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ocfs2/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ocfs2/file.c 2006-05-05 13:36:49.000000000 -0700
@@ -929,25 +929,23 @@ static inline int ocfs2_write_should_rem
}

static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };
int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0;
u32 clusters;
struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
loff_t newsize, saved_pos;

- mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,
- (unsigned int)count,
+ mlog_entry("(0x%p, %u, '%.*s')\n", filp,
+ (unsigned int)nr_segs,
filp->f_dentry->d_name.len,
filp->f_dentry->d_name.name);

/* happy write of zero bytes */
- if (count == 0)
+ if (iocb->ki_left == 0)
return 0;

if (!inode) {
@@ -1016,7 +1014,7 @@ static ssize_t ocfs2_file_aio_write(stru
} else {
saved_pos = iocb->ki_pos;
}
- newsize = count + saved_pos;
+ newsize = iocb->ki_left + saved_pos;

mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
(long long) saved_pos, (long long) newsize,
@@ -1059,7 +1057,7 @@ static ssize_t ocfs2_file_aio_write(stru
/* Fill any holes which would've been created by this
* write. If we're O_APPEND, this will wind up
* (correctly) being a noop. */
- ret = ocfs2_zero_extend(inode, (u64) newsize - count);
+ ret = ocfs2_zero_extend(inode, (u64) newsize - iocb->ki_left);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -1075,7 +1073,7 @@ static ssize_t ocfs2_file_aio_write(stru
/* communicate with ocfs2_dio_end_io */
ocfs2_iocb_set_rw_locked(iocb);

- ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
+ ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos);

/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
@@ -1109,16 +1107,16 @@ out:
}

static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
int ret = 0, rw_level = -1, have_alloc_sem = 0;
struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;

- mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,
- (unsigned int)count,
+ mlog_entry("(0x%p, %u, '%.*s')\n", filp,
+ (unsigned int)nr_segs,
filp->f_dentry->d_name.len,
filp->f_dentry->d_name.name);

@@ -1146,7 +1144,7 @@ static ssize_t ocfs2_file_aio_read(struc
ocfs2_iocb_set_rw_locked(iocb);
}

- ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos);
+ ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
if (ret == -EINVAL)
mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n");

Index: linux-2.6.17-rc3.save/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ntfs/file.c 2006-05-02 08:28:50.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ntfs/file.c 2006-05-11 08:24:23.323478888 -0700
@@ -2174,20 +2174,18 @@ out:
/**
* ntfs_file_aio_write -
*/
-static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };

BUG_ON(iocb->ki_pos != pos);

mutex_lock(&inode->i_mutex);
- ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
+ ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err = sync_page_range(inode, mapping, pos, ret);


2006-05-11 15:41:14

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead.

This patch removes readv() and writev() methods and replaces
them with aio_read()/aio_write() methods.

Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>

drivers/char/raw.c | 2
drivers/net/tun.c | 35 +++------------
fs/bad_inode.c | 2
fs/block_dev.c | 2
fs/cifs/cifsfs.c | 16 ------
fs/compat.c | 44 ++++---------------
fs/ext2/file.c | 2
fs/ext3/file.c | 2
fs/fat/file.c | 2
fs/fuse/dev.c | 35 +++------------
fs/hostfs/hostfs_kern.c | 2
fs/jfs/file.c | 2
fs/ntfs/file.c | 2
fs/pipe.c | 51 ++++++----------------
fs/read_write.c | 101 +++++++++++++++++++++++++++++---------------
fs/read_write.h | 14 ++++++
fs/xfs/linux-2.6/xfs_file.c | 92 ----------------------------------------
include/linux/fs.h | 6 --
mm/filemap.c | 36 ---------------
net/socket.c | 40 -----------------
sound/core/pcm_native.c | 40 ++++++++---------
21 files changed, 142 insertions(+), 386 deletions(-)

Index: linux-2.6.17-rc3.save/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc3.save.orig/drivers/char/raw.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/drivers/char/raw.c 2006-05-11 08:22:04.192629992 -0700
@@ -258,8 +258,6 @@ static struct file_operations raw_fops =
.open = raw_open,
.release= raw_release,
.ioctl = raw_ioctl,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.owner = THIS_MODULE,
};

Index: linux-2.6.17-rc3.save/drivers/net/tun.c
===================================================================
--- linux-2.6.17-rc3.save.orig/drivers/net/tun.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/drivers/net/tun.c 2006-05-10 08:23:47.000000000 -0700
@@ -289,11 +289,10 @@ static inline size_t iov_total(const str
return len;
}

-/* Writev */
-static ssize_t tun_chr_writev(struct file * file, const struct iovec *iv,
- unsigned long count, loff_t *pos)
+static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
{
- struct tun_struct *tun = file->private_data;
+ struct tun_struct *tun = iocb->ki_filp->private_data;

if (!tun)
return -EBADFD;
@@ -303,14 +302,6 @@ static ssize_t tun_chr_writev(struct fil
return tun_get_user(tun, (struct iovec *) iv, iov_total(iv, count));
}

-/* Write */
-static ssize_t tun_chr_write(struct file * file, const char __user * buf,
- size_t count, loff_t *pos)
-{
- struct iovec iv = { (void __user *) buf, count };
- return tun_chr_writev(file, &iv, 1, pos);
-}
-
/* Put packet to the user space buffer */
static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
struct sk_buff *skb,
@@ -344,10 +335,10 @@ static __inline__ ssize_t tun_put_user(s
return total;
}

-/* Readv */
-static ssize_t tun_chr_readv(struct file *file, const struct iovec *iv,
- unsigned long count, loff_t *pos)
+static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
{
+ struct file *file = iocb->ki_filp;
struct tun_struct *tun = file->private_data;
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
@@ -427,14 +418,6 @@ static ssize_t tun_chr_readv(struct file
return ret;
}

-/* Read */
-static ssize_t tun_chr_read(struct file * file, char __user * buf,
- size_t count, loff_t *pos)
-{
- struct iovec iv = { buf, count };
- return tun_chr_readv(file, &iv, 1, pos);
-}
-
static void tun_setup(struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
@@ -762,10 +745,8 @@ static int tun_chr_close(struct inode *i
static struct file_operations tun_fops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
- .read = tun_chr_read,
- .readv = tun_chr_readv,
- .write = tun_chr_write,
- .writev = tun_chr_writev,
+ .aio_read = tun_chr_aio_read,
+ .aio_write = tun_chr_aio_write,
.poll = tun_chr_poll,
.ioctl = tun_chr_ioctl,
.open = tun_chr_open,
Index: linux-2.6.17-rc3.save/fs/bad_inode.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/bad_inode.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/bad_inode.c 2006-05-10 08:23:47.000000000 -0700
@@ -40,8 +40,6 @@ static const struct file_operations bad_
.aio_fsync = EIO_ERROR,
.fasync = EIO_ERROR,
.lock = EIO_ERROR,
- .readv = EIO_ERROR,
- .writev = EIO_ERROR,
.sendfile = EIO_ERROR,
.sendpage = EIO_ERROR,
.get_unmapped_area = EIO_ERROR,
Index: linux-2.6.17-rc3.save/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/block_dev.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/block_dev.c 2006-05-11 08:22:04.189630448 -0700
@@ -1093,8 +1093,6 @@ const struct file_operations def_blk_fop
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
- .readv = generic_file_readv,
- .writev = generic_file_write_nolock,
.sendfile = generic_file_sendfile,
};

Index: linux-2.6.17-rc3.save/fs/cifs/cifsfs.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/cifs/cifsfs.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/cifs/cifsfs.c 2006-05-10 08:23:47.000000000 -0700
@@ -484,18 +484,6 @@ cifs_get_sb(struct file_system_type *fs_
return sb;
}

-static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct inode *inode = file->f_dentry->d_inode;
- ssize_t written;
-
- written = generic_file_writev(file, iov, nr_segs, ppos);
- if (!CIFS_I(inode)->clientCanCacheAll)
- filemap_fdatawrite(inode->i_mapping);
- return written;
-}
-
static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -581,8 +569,6 @@ struct inode_operations cifs_symlink_ino
const struct file_operations cifs_file_ops = {
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = cifs_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = cifs_file_aio_write,
.open = cifs_open,
@@ -624,8 +610,6 @@ const struct file_operations cifs_file_d
const struct file_operations cifs_file_nobrl_ops = {
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = cifs_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = cifs_file_aio_write,
.open = cifs_open,
Index: linux-2.6.17-rc3.save/fs/compat.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/compat.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/compat.c 2006-05-10 08:23:47.000000000 -0700
@@ -55,6 +55,8 @@

extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);

+#include "read_write.h"
+
/*
* Not all architectures have sys_utime, so implement this in terms
* of sys_utimes.
@@ -1139,9 +1141,6 @@ static ssize_t compat_do_readv_writev(in
const struct compat_iovec __user *uvector,
unsigned long nr_segs, loff_t *pos)
{
- typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
- typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *);
-
compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov=iovstack, *vector;
@@ -1224,39 +1223,18 @@ static ssize_t compat_do_readv_writev(in
fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->readv;
+ fnv = file->f_op->aio_read;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->writev;
- }
- if (fnv) {
- ret = fnv(file, iov, nr_segs, pos);
- goto out;
+ fnv = file->f_op->aio_write;
}

- /* Do it by hand, with file-ops */
- ret = 0;
- vector = iov;
- while (nr_segs > 0) {
- void __user * base;
- size_t len;
- ssize_t nr;
-
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(file, base, len, pos);
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+ pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);

- if (nr < 0) {
- if (!ret) ret = nr;
- break;
- }
- ret += nr;
- if (nr != len)
- break;
- }
out:
if (iov != iovstack)
kfree(iov);
@@ -1284,7 +1262,7 @@ compat_sys_readv(unsigned long fd, const
goto out;

ret = -EINVAL;
- if (!file->f_op || (!file->f_op->readv && !file->f_op->read))
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
goto out;

ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos);
@@ -1307,7 +1285,7 @@ compat_sys_writev(unsigned long fd, cons
goto out;

ret = -EINVAL;
- if (!file->f_op || (!file->f_op->writev && !file->f_op->write))
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
goto out;

ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos);
Index: linux-2.6.17-rc3.save/fs/ext2/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ext2/file.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ext2/file.c 2006-05-11 08:22:04.187630752 -0700
@@ -50,8 +50,6 @@ const struct file_operations ext2_file_o
.open = generic_file_open,
.release = ext2_release_file,
.fsync = ext2_sync_file,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
Index: linux-2.6.17-rc3.save/fs/ext3/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ext3/file.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ext3/file.c 2006-05-10 08:23:47.000000000 -0700
@@ -112,8 +112,6 @@ const struct file_operations ext3_file_o
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = ext3_file_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.ioctl = ext3_ioctl,
.mmap = generic_file_mmap,
.open = generic_file_open,
Index: linux-2.6.17-rc3.save/fs/fat/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/fat/file.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/fat/file.c 2006-05-10 08:23:47.000000000 -0700
@@ -116,8 +116,6 @@ const struct file_operations fat_file_op
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
Index: linux-2.6.17-rc3.save/fs/fuse/dev.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/fuse/dev.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/fuse/dev.c 2006-05-10 08:23:47.000000000 -0700
@@ -585,14 +585,15 @@ static void request_wait(struct fuse_con
* request_end(). Otherwise add it to the processing list, and set
* the 'sent' flag.
*/
-static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *off)
+static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
int err;
struct fuse_req *req;
struct fuse_in *in;
struct fuse_copy_state cs;
unsigned reqsize;
+ struct file *file = iocb->ki_filp;
struct fuse_conn *fc = fuse_get_conn(file);
if (!fc)
return -EPERM;
@@ -658,15 +659,6 @@ static ssize_t fuse_dev_readv(struct fil
return err;
}

-static ssize_t fuse_dev_read(struct file *file, char __user *buf,
- size_t nbytes, loff_t *off)
-{
- struct iovec iov;
- iov.iov_len = nbytes;
- iov.iov_base = buf;
- return fuse_dev_readv(file, &iov, 1, off);
-}
-
/* Look up request on processing list by unique ID */
static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
{
@@ -711,15 +703,15 @@ static int copy_out_args(struct fuse_cop
* it from the list and copy the rest of the buffer to the request.
* The request is finished by calling request_end()
*/
-static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *off)
+static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
int err;
unsigned nbytes = iov_length(iov, nr_segs);
struct fuse_req *req;
struct fuse_out_header oh;
struct fuse_copy_state cs;
- struct fuse_conn *fc = fuse_get_conn(file);
+ struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
if (!fc)
return -EPERM;

@@ -779,15 +771,6 @@ static ssize_t fuse_dev_writev(struct fi
return err;
}

-static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
- size_t nbytes, loff_t *off)
-{
- struct iovec iov;
- iov.iov_len = nbytes;
- iov.iov_base = (char __user *) buf;
- return fuse_dev_writev(file, &iov, 1, off);
-}
-
static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
{
unsigned mask = POLLOUT | POLLWRNORM;
@@ -921,10 +904,8 @@ static int fuse_dev_fasync(int fd, struc
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
.llseek = no_llseek,
- .read = fuse_dev_read,
- .readv = fuse_dev_readv,
- .write = fuse_dev_write,
- .writev = fuse_dev_writev,
+ .aio_read = fuse_dev_read,
+ .aio_write = fuse_dev_write,
.poll = fuse_dev_poll,
.release = fuse_dev_release,
.fasync = fuse_dev_fasync,
Index: linux-2.6.17-rc3.save/fs/hostfs/hostfs_kern.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hostfs/hostfs_kern.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hostfs/hostfs_kern.c 2006-05-11 08:22:04.191630144 -0700
@@ -390,8 +390,6 @@ static const struct file_operations host
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.write = generic_file_write,
.mmap = generic_file_mmap,
.open = hostfs_file_open,
Index: linux-2.6.17-rc3.save/fs/jfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/jfs/file.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/jfs/file.c 2006-05-11 08:22:04.186630904 -0700
@@ -108,8 +108,6 @@ const struct file_operations jfs_file_op
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.sendfile = generic_file_sendfile,
.fsync = jfs_fsync,
.release = jfs_release,
Index: linux-2.6.17-rc3.save/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ntfs/file.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ntfs/file.c 2006-05-11 08:22:04.188630600 -0700
@@ -2296,11 +2296,9 @@ const struct file_operations ntfs_file_o
.llseek = generic_file_llseek, /* Seek inside file. */
.read = generic_file_read, /* Read from file. */
.aio_read = generic_file_aio_read, /* Async read from file. */
- .readv = generic_file_readv, /* Read from file. */
#ifdef NTFS_RW
.write = ntfs_file_write, /* Write to file. */
.aio_write = ntfs_file_aio_write, /* Async write to file. */
- .writev = ntfs_file_writev, /* Write to file. */
/*.release = ,*/ /* Last file is closed. See
fs/ext2/file.c::
ext2_release_file() for
Index: linux-2.6.17-rc3.save/fs/pipe.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/pipe.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/pipe.c 2006-05-10 08:24:51.000000000 -0700
@@ -147,9 +147,10 @@ static struct pipe_buf_operations anon_p
};

static ssize_t
-pipe_readv(struct file *filp, const struct iovec *_iov,
- unsigned long nr_segs, loff_t *ppos)
+pipe_read(struct kiocb *iocb, const struct iovec *_iov,
+ unsigned long nr_segs, loff_t pos)
{
+ struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
struct pipe_inode_info *pipe;
int do_wakeup;
@@ -248,17 +249,10 @@ pipe_readv(struct file *filp, const stru
}

static ssize_t
-pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
- struct iovec iov = { .iov_base = buf, .iov_len = count };
-
- return pipe_readv(filp, &iov, 1, ppos);
-}
-
-static ssize_t
-pipe_writev(struct file *filp, const struct iovec *_iov,
- unsigned long nr_segs, loff_t *ppos)
+pipe_write(struct kiocb *iocb, const struct iovec *_iov,
+ unsigned long nr_segs, loff_t ppos)
{
+ struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
struct pipe_inode_info *pipe;
ssize_t ret;
@@ -404,15 +398,6 @@ out:
}

static ssize_t
-pipe_write(struct file *filp, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return pipe_writev(filp, &iov, 1, ppos);
-}
-
-static ssize_t
bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{
return -EBADF;
@@ -630,8 +615,7 @@ pipe_rdwr_open(struct inode *inode, stru
*/
const struct file_operations read_fifo_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
+ .aio_read = pipe_read,
.write = bad_pipe_w,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
@@ -643,8 +627,7 @@ const struct file_operations read_fifo_f
const struct file_operations write_fifo_fops = {
.llseek = no_llseek,
.read = bad_pipe_r,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_write_open,
@@ -654,10 +637,8 @@ const struct file_operations write_fifo_

const struct file_operations rdwr_fifo_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_read = pipe_read,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
@@ -667,8 +648,7 @@ const struct file_operations rdwr_fifo_f

static struct file_operations read_pipe_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
+ .aio_read = pipe_read,
.write = bad_pipe_w,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
@@ -680,8 +660,7 @@ static struct file_operations read_pipe_
static struct file_operations write_pipe_fops = {
.llseek = no_llseek,
.read = bad_pipe_r,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_write_open,
@@ -691,10 +670,8 @@ static struct file_operations write_pipe

static struct file_operations rdwr_pipe_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_read = pipe_read,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
Index: linux-2.6.17-rc3.save/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/read_write.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/read_write.c 2006-05-11 08:23:43.843480760 -0700
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
+#include "read_write.h"

#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -450,6 +451,63 @@ unsigned long iov_shorten(struct iovec *

EXPORT_SYMBOL(iov_shorten);

+ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
+{
+ struct kiocb kiocb;
+ ssize_t ret;
+
+ init_sync_kiocb(&kiocb, filp);
+ kiocb.ki_pos = *ppos;
+ kiocb.ki_left = len;
+ kiocb.ki_nbytes = len;
+
+ for (;;) {
+ ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
+ wait_on_retry_sync_kiocb(&kiocb);
+ }
+
+ if (ret == -EIOCBQUEUED)
+ ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
+ return ret;
+}
+
+/* Do it by hand, with file-ops */
+ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
+{
+ struct iovec *vector = iov;
+ ssize_t ret = 0;
+
+
+ while (nr_segs > 0) {
+ void __user * base;
+ size_t len;
+ ssize_t nr;
+
+ base = vector->iov_base;
+ len = vector->iov_len;
+ vector++;
+ nr_segs--;
+
+ nr = fn(filp, base, len, ppos);
+
+ if (nr < 0) {
+ if (!ret)
+ ret = nr;
+ break;
+ }
+ ret += nr;
+ if (nr != len)
+ break;
+ }
+
+ return ret;
+}
+
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)

@@ -457,12 +515,9 @@ static ssize_t do_readv_writev(int type,
const struct iovec __user * uvector,
unsigned long nr_segs, loff_t *pos)
{
- typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
- typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *);
-
size_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov=iovstack, *vector;
+ struct iovec *iov = iovstack;
ssize_t ret;
int seg;
io_fn_t fn;
@@ -532,39 +587,17 @@ static ssize_t do_readv_writev(int type,
fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->readv;
+ fnv = file->f_op->aio_read;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->writev;
- }
- if (fnv) {
- ret = fnv(file, iov, nr_segs, pos);
- goto out;
+ fnv = file->f_op->aio_write;
}

- /* Do it by hand, with file-ops */
- ret = 0;
- vector = iov;
- while (nr_segs > 0) {
- void __user * base;
- size_t len;
- ssize_t nr;
-
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(file, base, len, pos);
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);

- if (nr < 0) {
- if (!ret) ret = nr;
- break;
- }
- ret += nr;
- if (nr != len)
- break;
- }
out:
if (iov != iovstack)
kfree(iov);
@@ -585,7 +618,7 @@ ssize_t vfs_readv(struct file *file, con
{
if (!(file->f_mode & FMODE_READ))
return -EBADF;
- if (!file->f_op || (!file->f_op->readv && !file->f_op->read))
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
return -EINVAL;

return do_readv_writev(READ, file, vec, vlen, pos);
@@ -598,7 +631,7 @@ ssize_t vfs_writev(struct file *file, co
{
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
- if (!file->f_op || (!file->f_op->writev && !file->f_op->write))
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
return -EINVAL;

return do_readv_writev(WRITE, file, vec, vlen, pos);
Index: linux-2.6.17-rc3.save/fs/read_write.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17-rc3.save/fs/read_write.h 2006-05-10 08:23:47.000000000 -0700
@@ -0,0 +1,14 @@
+/*
+ * This file is only for sharing some helpers from read_write.c with compat.c.
+ * Don't use anywhere else.
+ */
+
+
+typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
+typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+
+ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
+ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
Index: linux-2.6.17-rc3.save/fs/xfs/linux-2.6/xfs_file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/xfs/linux-2.6/xfs_file.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/xfs/linux-2.6/xfs_file.c 2006-05-10 08:23:47.000000000 -0700
@@ -129,94 +129,6 @@ xfs_file_aio_write_invis(
return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

-STATIC inline ssize_t
-__xfs_file_readv(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = vn_from_inode(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
-
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
- VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-xfs_file_readv(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_readv(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-xfs_file_readv_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
-STATIC inline ssize_t
-__xfs_file_writev(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = vn_from_inode(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
-
- VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-xfs_file_writev(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_writev(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-xfs_file_writev_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_writev(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
STATIC ssize_t
xfs_file_sendfile(
struct file *filp,
@@ -577,8 +489,6 @@ const struct file_operations xfs_file_op
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = xfs_file_readv,
- .writev = xfs_file_writev,
.aio_read = xfs_file_aio_read,
.aio_write = xfs_file_aio_write,
.sendfile = xfs_file_sendfile,
@@ -601,8 +511,6 @@ const struct file_operations xfs_invis_f
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = xfs_file_readv_invis,
- .writev = xfs_file_writev_invis,
.aio_read = xfs_file_aio_read_invis,
.aio_write = xfs_file_aio_write_invis,
.sendfile = xfs_file_sendfile_invis,
Index: linux-2.6.17-rc3.save/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/fs.h 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/fs.h 2006-05-11 08:23:43.843480760 -0700
@@ -1031,8 +1031,6 @@ struct file_operations {
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
- ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
@@ -1624,10 +1622,6 @@ extern long do_splice_direct(struct file

extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
-extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
-ssize_t generic_file_writev(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
extern loff_t remote_llseek(struct file *file, loff_t offset, int origin);
Index: linux-2.6.17-rc3.save/mm/filemap.c
===================================================================
--- linux-2.6.17-rc3.save.orig/mm/filemap.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/mm/filemap.c 2006-05-11 08:22:04.192629992 -0700
@@ -2266,42 +2266,6 @@ ssize_t generic_file_write(struct file *
}
EXPORT_SYMBOL(generic_file_write);

-ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- ret = __generic_file_aio_read(&kiocb, iov, nr_segs, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-EXPORT_SYMBOL(generic_file_readv);
-
-ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret;
-
- mutex_lock(&inode->i_mutex);
- ret = __generic_file_write_nolock(file, iov, nr_segs, ppos);
- mutex_unlock(&inode->i_mutex);
-
- if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
-
- err = sync_page_range(inode, mapping, *ppos - ret, ret);
- if (err < 0)
- ret = err;
- }
- return ret;
-}
-EXPORT_SYMBOL(generic_file_writev);
-
/*
* Called under i_mutex for writes to S_ISREG files. Returns -EIO if something
* went wrong during pagecache shootdown.
Index: linux-2.6.17-rc3.save/net/socket.c
===================================================================
--- linux-2.6.17-rc3.save.orig/net/socket.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/net/socket.c 2006-05-10 08:23:47.000000000 -0700
@@ -112,10 +112,6 @@ static long compat_sock_ioctl(struct fil
unsigned int cmd, unsigned long arg);
#endif
static int sock_fasync(int fd, struct file *filp, int on);
-static ssize_t sock_readv(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
-static ssize_t sock_writev(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more);

@@ -138,8 +134,6 @@ static struct file_operations socket_fil
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
- .readv = sock_readv,
- .writev = sock_writev,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
};
@@ -736,23 +730,6 @@ static ssize_t do_sock_read(struct msghd
return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
}

-static ssize_t sock_readv(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb iocb;
- struct sock_iocb siocb;
- struct msghdr msg;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- iocb.private = &siocb;
-
- ret = do_sock_read(&msg, &iocb, file, iov, nr_segs);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
-}
-
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -795,23 +772,6 @@ static ssize_t do_sock_write(struct msgh
return __sock_sendmsg(iocb, sock, msg, size);
}

-static ssize_t sock_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct msghdr msg;
- struct kiocb iocb;
- struct sock_iocb siocb;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- iocb.private = &siocb;
-
- ret = do_sock_write(&msg, &iocb, file, iov, nr_segs);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
-}
-
static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
Index: linux-2.6.17-rc3.save/sound/core/pcm_native.c
===================================================================
--- linux-2.6.17-rc3.save.orig/sound/core/pcm_native.c 2006-05-10 08:21:49.000000000 -0700
+++ linux-2.6.17-rc3.save/sound/core/pcm_native.c 2006-05-10 08:23:47.000000000 -0700
@@ -2819,8 +2819,8 @@ static ssize_t snd_pcm_write(struct file
return result;
}

-static ssize_t snd_pcm_readv(struct file *file, const struct iovec *_vector,
- unsigned long count, loff_t * offset)
+static ssize_t snd_pcm_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)

{
struct snd_pcm_file *pcm_file;
@@ -2831,22 +2831,22 @@ static ssize_t snd_pcm_readv(struct file
void __user **bufs;
snd_pcm_uframes_t frames;

- pcm_file = file->private_data;
+ pcm_file = iocb->ki_filp->private_data;
substream = pcm_file->substream;
snd_assert(substream != NULL, return -ENXIO);
runtime = substream->runtime;
if (runtime->status->state == SNDRV_PCM_STATE_OPEN)
return -EBADFD;
- if (count > 1024 || count != runtime->channels)
+ if (nr_segs > 1024 || nr_segs != runtime->channels)
return -EINVAL;
- if (!frame_aligned(runtime, _vector->iov_len))
+ if (!frame_aligned(runtime, iov->iov_len))
return -EINVAL;
- frames = bytes_to_samples(runtime, _vector->iov_len);
- bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL);
+ frames = bytes_to_samples(runtime, iov->iov_len);
+ bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
- for (i = 0; i < count; ++i)
- bufs[i] = _vector[i].iov_base;
+ for (i = 0; i < nr_segs; ++i)
+ bufs[i] = iov[i].iov_base;
result = snd_pcm_lib_readv(substream, bufs, frames);
if (result > 0)
result = frames_to_bytes(runtime, result);
@@ -2854,8 +2854,8 @@ static ssize_t snd_pcm_readv(struct file
return result;
}

-static ssize_t snd_pcm_writev(struct file *file, const struct iovec *_vector,
- unsigned long count, loff_t * offset)
+static ssize_t snd_pcm_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct snd_pcm_file *pcm_file;
struct snd_pcm_substream *substream;
@@ -2865,7 +2865,7 @@ static ssize_t snd_pcm_writev(struct fil
void __user **bufs;
snd_pcm_uframes_t frames;

- pcm_file = file->private_data;
+ pcm_file = iocb->ki_filp->private_data;
substream = pcm_file->substream;
snd_assert(substream != NULL, result = -ENXIO; goto end);
runtime = substream->runtime;
@@ -2873,17 +2873,17 @@ static ssize_t snd_pcm_writev(struct fil
result = -EBADFD;
goto end;
}
- if (count > 128 || count != runtime->channels ||
- !frame_aligned(runtime, _vector->iov_len)) {
+ if (nr_segs > 128 || nr_segs != runtime->channels ||
+ !frame_aligned(runtime, iov->iov_len)) {
result = -EINVAL;
goto end;
}
- frames = bytes_to_samples(runtime, _vector->iov_len);
- bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL);
+ frames = bytes_to_samples(runtime, iov->iov_len);
+ bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
- for (i = 0; i < count; ++i)
- bufs[i] = _vector[i].iov_base;
+ for (i = 0; i < nr_segs; ++i)
+ bufs[i] = iov[i].iov_base;
result = snd_pcm_lib_writev(substream, bufs, frames);
if (result > 0)
result = frames_to_bytes(runtime, result);
@@ -3389,7 +3389,7 @@ struct file_operations snd_pcm_f_ops[2]
{
.owner = THIS_MODULE,
.write = snd_pcm_write,
- .writev = snd_pcm_writev,
+ .aio_write = snd_pcm_aio_write,
.open = snd_pcm_playback_open,
.release = snd_pcm_release,
.poll = snd_pcm_playback_poll,
@@ -3401,7 +3401,7 @@ struct file_operations snd_pcm_f_ops[2]
{
.owner = THIS_MODULE,
.read = snd_pcm_read,
- .readv = snd_pcm_readv,
+ .aio_read = snd_pcm_aio_read,
.open = snd_pcm_capture_open,
.release = snd_pcm_release,
.poll = snd_pcm_capture_poll,


2006-05-11 15:41:16

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 3/4] Core aio changes to support vectored AIO

This work is initially done by Zach Brown to add support for
vectored aio. These are the core changes for AIO to support
IOCB_CMD_PREADV/IOCB_CMD_PWRITEV.

I made few extra changes beyond Zach's work. They are
- took out aio_pread/aio_pwrite and made them
a special case into vectored support
- added single inlined vector to save on kmalloc()
for a simple aio_read/aio_write
- kiocb->ki_left always indicates the amount of
IO need to be done. Made sure that this gets
set in sync case also, so that we don't need
to loop over iovecs to figure out IO size all
the time.

Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: Zach Brown <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Acked-by: Benjamin LaHaise <[email protected]>

fs/aio.c | 165 +++++++++++++++++++++++++++++++++---------------
fs/read_write.c | 127 +++++++++++++++++++++---------------
include/linux/aio.h | 4 +
include/linux/aio_abi.h | 2
include/linux/fs.h | 5 +
5 files changed, 199 insertions(+), 104 deletions(-)

Index: linux-2.6.17-rc3.save/fs/aio.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/aio.c 2006-05-10 08:21:48.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/aio.c 2006-05-11 08:22:27.317114536 -0700
@@ -415,6 +415,7 @@ static struct kiocb fastcall *__aio_get_
req->ki_retry = NULL;
req->ki_dtor = NULL;
req->private = NULL;
+ req->ki_iovec = NULL;
INIT_LIST_HEAD(&req->ki_run_list);

/* Check if the completion queue has enough free space to
@@ -460,6 +461,8 @@ static inline void really_put_req(struct

if (req->ki_dtor)
req->ki_dtor(req);
+ if (req->ki_iovec != &req->ki_inline_vec)
+ kfree(req->ki_iovec);
kmem_cache_free(kiocb_cachep, req);
ctx->reqs_active--;

@@ -1301,69 +1304,63 @@ asmlinkage long sys_io_destroy(aio_conte
return -EINVAL;
}

-/*
- * aio_p{read,write} are the default ki_retry methods for
- * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially
- * multiple calls to f_op->aio_read(). They loop around partial progress
- * instead of returning -EIOCBRETRY because they don't have the means to call
- * kick_iocb().
- */
-static ssize_t aio_pread(struct kiocb *iocb)
+static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret)
{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret = 0;
+ struct iovec *iov = &iocb->ki_iovec[iocb->ki_cur_seg];

- do {
- iocb->ki_inline_vec.iov_base = iocb->ki_buf;
- iocb->ki_inline_vec.iov_len = iocb->ki_left;
+ BUG_ON(ret <= 0);

- ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec,
- 1, iocb->ki_pos);
- /*
- * Can't just depend on iocb->ki_left to determine
- * whether we are done. This may have been a short read.
- */
- if (ret > 0) {
- iocb->ki_buf += ret;
- iocb->ki_left -= ret;
+ while (iocb->ki_cur_seg < iocb->ki_nr_segs && ret > 0) {
+ ssize_t this = min(iov->iov_len, ret);
+ iov->iov_base += this;
+ iov->iov_len -= this;
+ iocb->ki_left -= this;
+ ret -= this;
+ if (iov->iov_len == 0) {
+ iocb->ki_cur_seg++;
+ iov++;
}
+ }

- /*
- * For pipes and sockets we return once we have some data; for
- * regular files we retry till we complete the entire read or
- * find that we can't read any more data (e.g short reads).
- */
- } while (ret > 0 && iocb->ki_left > 0 &&
- !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
-
- /* This means we must have transferred all that we could */
- /* No need to retry anymore */
- if ((ret == 0) || (iocb->ki_left == 0))
- ret = iocb->ki_nbytes - iocb->ki_left;
-
- return ret;
+ /* the caller should not have done more io than what fit in
+ * the remaining iovecs */
+ BUG_ON(ret > 0 && iocb->ki_left == 0);
}

-/* see aio_pread() */
-static ssize_t aio_pwrite(struct kiocb *iocb)
+static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
{
struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ ssize_t (*rw_op)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
ssize_t ret = 0;
+ unsigned short opcode;
+
+ if ((iocb->ki_opcode == IOCB_CMD_PREADV) ||
+ (iocb->ki_opcode == IOCB_CMD_PREAD)) {
+ rw_op = file->f_op->aio_read;
+ opcode = IOCB_CMD_PREADV;
+ } else {
+ rw_op = file->f_op->aio_write;
+ opcode = IOCB_CMD_PWRITEV;
+ }

do {
- iocb->ki_inline_vec.iov_base = iocb->ki_buf;
- iocb->ki_inline_vec.iov_len = iocb->ki_left;
+ ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
+ iocb->ki_nr_segs - iocb->ki_cur_seg,
+ iocb->ki_pos);
+ if (ret > 0)
+ aio_advance_iovec(iocb, ret);

- ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec,
- 1, iocb->ki_pos);
- if (ret > 0) {
- iocb->ki_buf += ret;
- iocb->ki_left -= ret;
- }
- } while (ret > 0 && iocb->ki_left > 0);
+ /* retry all partial writes. retry partial reads as long as its a
+ * regular file. */
+ } while (ret > 0 && iocb->ki_left > 0 &&
+ (opcode == IOCB_CMD_PWRITEV ||
+ (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));

+ /* This means we must have transferred all that we could */
+ /* No need to retry anymore */
if ((ret == 0) || (iocb->ki_left == 0))
ret = iocb->ki_nbytes - iocb->ki_left;

@@ -1390,6 +1387,38 @@ static ssize_t aio_fsync(struct kiocb *i
return ret;
}

+static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb)
+{
+ ssize_t ret;
+
+ ret = rw_copy_check_uvector((struct iovec __user *)kiocb->ki_buf,
+ kiocb->ki_nbytes, 1,
+ &kiocb->ki_inline_vec, &kiocb->ki_iovec);
+ if (ret < 0)
+ goto out;
+
+ kiocb->ki_nr_segs = kiocb->ki_nbytes;
+ kiocb->ki_cur_seg = 0;
+ /* ki_nbytes/left now reflect bytes instead of segs */
+ kiocb->ki_nbytes = ret;
+ kiocb->ki_left = ret;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
+{
+ kiocb->ki_iovec = &kiocb->ki_inline_vec;
+ kiocb->ki_iovec->iov_base = kiocb->ki_buf;
+ kiocb->ki_iovec->iov_len = kiocb->ki_left;
+ kiocb->ki_nr_segs = 1;
+ kiocb->ki_cur_seg = 0;
+ kiocb->ki_nbytes = kiocb->ki_left;
+ return 0;
+}
+
/*
* aio_setup_iocb:
* Performs the initial checks and aio retry method
@@ -1412,9 +1441,12 @@ static ssize_t aio_setup_iocb(struct kio
ret = security_file_permission(file, MAY_READ);
if (unlikely(ret))
break;
+ ret = aio_setup_single_vector(kiocb);
+ if (ret)
+ break;
ret = -EINVAL;
if (file->f_op->aio_read)
- kiocb->ki_retry = aio_pread;
+ kiocb->ki_retry = aio_rw_vect_retry;
break;
case IOCB_CMD_PWRITE:
ret = -EBADF;
@@ -1427,9 +1459,40 @@ static ssize_t aio_setup_iocb(struct kio
ret = security_file_permission(file, MAY_WRITE);
if (unlikely(ret))
break;
+ ret = aio_setup_single_vector(kiocb);
+ if (ret)
+ break;
+ ret = -EINVAL;
+ if (file->f_op->aio_write)
+ kiocb->ki_retry = aio_rw_vect_retry;
+ break;
+ case IOCB_CMD_PREADV:
+ ret = -EBADF;
+ if (unlikely(!(file->f_mode & FMODE_READ)))
+ break;
+ ret = security_file_permission(file, MAY_READ);
+ if (unlikely(ret))
+ break;
+ ret = aio_setup_vectored_rw(kiocb);
+ if (ret)
+ break;
+ ret = -EINVAL;
+ if (file->f_op->aio_read)
+ kiocb->ki_retry = aio_rw_vect_retry;
+ break;
+ case IOCB_CMD_PWRITEV:
+ ret = -EBADF;
+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
+ break;
+ ret = security_file_permission(file, MAY_WRITE);
+ if (unlikely(ret))
+ break;
+ ret = aio_setup_vectored_rw(kiocb);
+ if (ret)
+ break;
ret = -EINVAL;
if (file->f_op->aio_write)
- kiocb->ki_retry = aio_pwrite;
+ kiocb->ki_retry = aio_rw_vect_retry;
break;
case IOCB_CMD_FDSYNC:
ret = -EINVAL;
Index: linux-2.6.17-rc3.save/include/linux/aio.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/aio.h 2006-05-10 08:21:48.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/aio.h 2006-05-10 08:29:26.000000000 -0700
@@ -7,6 +7,7 @@
#include <linux/uio.h>

#include <asm/atomic.h>
+#include <linux/uio.h>

#define AIO_MAXSEGS 4
#define AIO_KIOGRP_NR_ATOMIC 8
@@ -114,6 +115,9 @@ struct kiocb {
long ki_kicked; /* just for testing */
long ki_queued; /* just for testing */
struct iovec ki_inline_vec; /* inline vector */
+ struct iovec *ki_iovec;
+ unsigned long ki_nr_segs;
+ unsigned long ki_cur_seg;

struct list_head ki_list; /* the aio core uses this
* for cancellation */
Index: linux-2.6.17-rc3.save/include/linux/aio_abi.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/aio_abi.h 2006-05-10 08:21:48.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/aio_abi.h 2006-05-10 08:29:26.000000000 -0700
@@ -41,6 +41,8 @@ enum {
* IOCB_CMD_POLL = 5,
*/
IOCB_CMD_NOOP = 6,
+ IOCB_CMD_PREADV = 7,
+ IOCB_CMD_PWRITEV = 8,
};

/* read() from /dev/aio returns these structures. */
Index: linux-2.6.17-rc3.save/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/read_write.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/read_write.c 2006-05-11 08:22:04.191630144 -0700
@@ -508,6 +508,74 @@ ssize_t do_loop_readv_writev(struct file
return ret;
}

+ssize_t rw_copy_check_uvector(const struct iovec __user * uvector,
+ unsigned long nr_segs, unsigned long fast_segs,
+ struct iovec *fast_pointer,
+ struct iovec **ret_pointer)
+ {
+ unsigned long seg;
+ ssize_t ret;
+ struct iovec *iov = fast_pointer;
+
+ /*
+ * SuS says "The readv() function *may* fail if the iovcnt argument
+ * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
+ * traditionally returned zero for zero segments, so...
+ */
+ if (nr_segs == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * First get the "struct iovec" from user memory and
+ * verify all the pointers
+ */
+ if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (nr_segs > fast_segs) {
+ iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+ if (iov == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /*
+ * According to the Single Unix Specification we should return EINVAL
+ * if an element length is < 0 when cast to ssize_t or if the
+ * total length would overflow the ssize_t return value of the
+ * system call.
+ */
+ ret = 0;
+ for (seg = 0; seg < nr_segs; seg++) {
+ void __user *buf = iov[seg].iov_base;
+ ssize_t len = (ssize_t)iov[seg].iov_len;
+
+ /* see if we we're about to use an invalid len or if
+ * it's about to overflow ssize_t */
+ if (len < 0 || (ret + len < ret)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret += len;
+ }
+out:
+ *ret_pointer = iov;
+ return ret;
+}
+
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)

@@ -519,64 +587,20 @@ static ssize_t do_readv_writev(int type,
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
ssize_t ret;
- int seg;
io_fn_t fn;
iov_fn_t fnv;

- /*
- * SuS says "The readv() function *may* fail if the iovcnt argument
- * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
- * traditionally returned zero for zero segments, so...
- */
- ret = 0;
- if (nr_segs == 0)
+ if (!file->f_op) {
+ ret = -EINVAL;
goto out;
-
- /*
- * First get the "struct iovec" from user memory and
- * verify all the pointers
- */
- ret = -EINVAL;
- if (nr_segs > UIO_MAXIOV)
- goto out;
- if (!file->f_op)
- goto out;
- if (nr_segs > UIO_FASTIOV) {
- ret = -ENOMEM;
- iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
- if (!iov)
- goto out;
}
- ret = -EFAULT;
- if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector)))
- goto out;

- /*
- * Single unix specification:
- * We should -EINVAL if an element length is not >= 0 and fitting an
- * ssize_t. The total length is fitting an ssize_t
- *
- * Be careful here because iov_len is a size_t not an ssize_t
- */
- tot_len = 0;
- ret = -EINVAL;
- for (seg = 0; seg < nr_segs; seg++) {
- void __user *buf = iov[seg].iov_base;
- ssize_t len = (ssize_t)iov[seg].iov_len;
-
- if (len < 0) /* size_t not fitting an ssize_t .. */
- goto out;
- if (unlikely(!access_ok(vrfy_dir(type), buf, len)))
- goto Efault;
- tot_len += len;
- if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */
- goto out;
- }
- if (tot_len == 0) {
- ret = 0;
+ ret = rw_copy_check_uvector(uvector, nr_segs, ARRAY_SIZE(iovstack),
+ iovstack, &iov);
+ if (ret <= 0)
goto out;
- }

+ tot_len = ret;
ret = rw_verify_area(type, file, pos, tot_len);
if (ret < 0)
goto out;
@@ -608,9 +632,6 @@ out:
fsnotify_modify(file->f_dentry);
}
return ret;
-Efault:
- ret = -EFAULT;
- goto out;
}

ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
Index: linux-2.6.17-rc3.save/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/fs.h 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/fs.h 2006-05-11 08:22:04.192629992 -0700
@@ -1068,6 +1068,11 @@ struct inode_operations {

struct seq_file;

+ssize_t rw_copy_check_uvector(const struct iovec __user * uvector,
+ unsigned long nr_segs, unsigned long fast_segs,
+ struct iovec *fast_pointer,
+ struct iovec **ret_pointer);
+
extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_readv(struct file *, const struct iovec __user *,


2006-05-11 15:42:29

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 4/4] Streamline generic_file_* interfaces and filemap cleanups

This patch cleans up generic_file_*_read/write() interfaces.
Christoph Hellwig gave me the idea for this clean ups.

In the nutshell, all filesystems should set .aio_read/.aio_write
methods and use do_sync_read/ do_sync_write() as their .read/.write
methods. This allows us to cleanup all variants of generic_file_*
routines.

Final available interfaces:

generic_file_aio_read() - read handler
generic_file_aio_write() - write handler
generic_file_aio_write_nolock() - no lock write handler

__generic_file_aio_write_nolock() - internal worker routine


Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: ??? Christoph Hellwig ???

drivers/char/raw.c | 15 +------
fs/adfs/file.c | 6 ++-
fs/affs/file.c | 6 ++-
fs/bfs/file.c | 6 ++-
fs/block_dev.c | 12 +-----
fs/ext2/file.c | 4 +-
fs/fuse/file.c | 6 ++-
fs/hfs/inode.c | 6 ++-
fs/hfsplus/inode.c | 6 ++-
fs/hostfs/hostfs_kern.c | 4 +-
fs/hpfs/file.c | 6 ++-
fs/jffs/inode-v23.c | 6 ++-
fs/jffs2/file.c | 6 ++-
fs/jfs/file.c | 4 +-
fs/minix/file.c | 6 ++-
fs/ntfs/file.c | 2 -
fs/qnx4/file.c | 6 ++-
fs/ramfs/file-mmu.c | 6 ++-
fs/ramfs/file-nommu.c | 6 ++-
fs/read_write.c | 3 +
fs/xfs/linux-2.6/xfs_lrw.c | 4 +-
include/linux/fs.h | 5 --
mm/filemap.c | 88 ++-------------------------------------------
23 files changed, 72 insertions(+), 147 deletions(-)

Index: linux-2.6.17-rc3.save/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc3.save.orig/drivers/char/raw.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/drivers/char/raw.c 2006-05-10 08:29:35.000000000 -0700
@@ -239,21 +239,10 @@ out:
return err;
}

-static ssize_t raw_file_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct iovec local_iov = {
- .iov_base = (char __user *)buf,
- .iov_len = count
- };
-
- return generic_file_write_nolock(file, &local_iov, 1, ppos);
-}
-
static struct file_operations raw_fops = {
- .read = generic_file_read,
+ .read = do_sync_read,
.aio_read = generic_file_aio_read,
- .write = raw_file_write,
+ .write = do_sync_write,
.aio_write = generic_file_aio_write_nolock,
.open = raw_open,
.release= raw_release,
Index: linux-2.6.17-rc3.save/fs/adfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/adfs/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/adfs/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -27,10 +27,12 @@

const struct file_operations adfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
.fsync = file_fsync,
- .write = generic_file_write,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.sendfile = generic_file_sendfile,
};

Index: linux-2.6.17-rc3.save/fs/affs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/affs/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/affs/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -27,8 +27,10 @@ static int affs_file_release(struct inod

const struct file_operations affs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.open = affs_file_open,
.release = affs_file_release,
Index: linux-2.6.17-rc3.save/fs/bfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/bfs/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/bfs/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -19,8 +19,10 @@

const struct file_operations bfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
};
Index: linux-2.6.17-rc3.save/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/block_dev.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/block_dev.c 2006-05-10 08:29:35.000000000 -0700
@@ -1056,14 +1056,6 @@ static int blkdev_close(struct inode * i
return blkdev_put(bdev);
}

-static ssize_t blkdev_file_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return generic_file_write_nolock(file, &local_iov, 1, ppos);
-}
-
static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
@@ -1083,8 +1075,8 @@ const struct file_operations def_blk_fop
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
- .read = generic_file_read,
- .write = blkdev_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write_nolock,
.mmap = generic_file_mmap,
Index: linux-2.6.17-rc3.save/fs/ext2/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ext2/file.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ext2/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -41,8 +41,8 @@ static int ext2_release_file (struct ino
*/
const struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.ioctl = ext2_ioctl,
Index: linux-2.6.17-rc3.save/fs/fuse/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/fuse/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/fuse/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -621,8 +621,10 @@ static int fuse_set_page_dirty(struct pa

static const struct file_operations fuse_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = fuse_file_mmap,
.open = fuse_open,
.flush = fuse_flush,
Index: linux-2.6.17-rc3.save/fs/hfs/inode.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hfs/inode.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hfs/inode.c 2006-05-10 08:29:35.000000000 -0700
@@ -603,8 +603,10 @@ int hfs_inode_setattr(struct dentry *den

static const struct file_operations hfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
.fsync = file_fsync,
Index: linux-2.6.17-rc3.save/fs/hfsplus/inode.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hfsplus/inode.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hfsplus/inode.c 2006-05-10 08:29:35.000000000 -0700
@@ -282,8 +282,10 @@ static struct inode_operations hfsplus_f

static const struct file_operations hfsplus_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
.fsync = file_fsync,
Index: linux-2.6.17-rc3.save/fs/hostfs/hostfs_kern.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hostfs/hostfs_kern.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hostfs/hostfs_kern.c 2006-05-10 08:29:35.000000000 -0700
@@ -386,11 +386,11 @@ int hostfs_fsync(struct file *file, stru

static const struct file_operations hostfs_file_fops = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
- .write = generic_file_write,
+ .write = do_sync_write,
.mmap = generic_file_mmap,
.open = hostfs_file_open,
.release = NULL,
Index: linux-2.6.17-rc3.save/fs/hpfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/hpfs/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/hpfs/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -113,7 +113,7 @@ static ssize_t hpfs_file_write(struct fi
{
ssize_t retval;

- retval = generic_file_write(file, buf, count, ppos);
+ retval = do_sync_write(file, buf, count, ppos);
if (retval > 0)
hpfs_i(file->f_dentry->d_inode)->i_dirty = 1;
return retval;
@@ -122,8 +122,10 @@ static ssize_t hpfs_file_write(struct fi
const struct file_operations hpfs_file_ops =
{
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.write = hpfs_file_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.release = hpfs_file_release,
.fsync = hpfs_file_fsync,
Index: linux-2.6.17-rc3.save/fs/jffs/inode-v23.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/jffs/inode-v23.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/jffs/inode-v23.c 2006-05-10 08:29:35.000000000 -0700
@@ -1633,8 +1633,10 @@ static const struct file_operations jffs
{
.open = generic_file_open,
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.ioctl = jffs_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs_fsync,
Index: linux-2.6.17-rc3.save/fs/jffs2/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/jffs2/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/jffs2/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -42,8 +42,10 @@ const struct file_operations jffs2_file_
{
.llseek = generic_file_llseek,
.open = generic_file_open,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.ioctl = jffs2_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs2_fsync,
Index: linux-2.6.17-rc3.save/fs/jfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/jfs/file.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/jfs/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -103,8 +103,8 @@ struct inode_operations jfs_file_inode_o
const struct file_operations jfs_file_operations = {
.open = jfs_open,
.llseek = generic_file_llseek,
- .write = generic_file_write,
- .read = generic_file_read,
+ .write = do_sync_write,
+ .read = do_sync_read,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
Index: linux-2.6.17-rc3.save/fs/minix/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/minix/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/minix/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -17,8 +17,10 @@ int minix_sync_file(struct file *, struc

const struct file_operations minix_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = minix_sync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6.17-rc3.save/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ntfs/file.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ntfs/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -2294,7 +2294,7 @@ static int ntfs_file_fsync(struct file *

const struct file_operations ntfs_file_ops = {
.llseek = generic_file_llseek, /* Seek inside file. */
- .read = generic_file_read, /* Read from file. */
+ .read = do_sync_read, /* Read from file. */
.aio_read = generic_file_aio_read, /* Async read from file. */
#ifdef NTFS_RW
.write = ntfs_file_write, /* Write to file. */
Index: linux-2.6.17-rc3.save/fs/qnx4/file.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/qnx4/file.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/qnx4/file.c 2006-05-10 08:29:35.000000000 -0700
@@ -22,11 +22,13 @@
const struct file_operations qnx4_file_operations =
{
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
#ifdef CONFIG_QNX4FS_RW
- .write = generic_file_write,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.fsync = qnx4_sync_file,
#endif
};
Index: linux-2.6.17-rc3.save/fs/ramfs/file-mmu.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ramfs/file-mmu.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ramfs/file-mmu.c 2006-05-10 08:29:35.000000000 -0700
@@ -33,8 +33,10 @@ struct address_space_operations ramfs_ao
};

const struct file_operations ramfs_file_operations = {
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = simple_sync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6.17-rc3.save/fs/ramfs/file-nommu.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/ramfs/file-nommu.c 2006-05-10 08:21:47.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ramfs/file-nommu.c 2006-05-10 08:29:35.000000000 -0700
@@ -36,8 +36,10 @@ struct address_space_operations ramfs_ao
const struct file_operations ramfs_file_operations = {
.mmap = ramfs_nommu_mmap,
.get_unmapped_area = ramfs_nommu_get_unmapped_area,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.fsync = simple_sync_file,
.sendfile = generic_file_sendfile,
.llseek = generic_file_llseek,
Index: linux-2.6.17-rc3.save/fs/read_write.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/read_write.c 2006-05-10 08:29:26.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/read_write.c 2006-05-10 08:29:35.000000000 -0700
@@ -22,7 +22,8 @@

const struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_readonly_mmap,
.sendfile = generic_file_sendfile,
};
Index: linux-2.6.17-rc3.save/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc3.save.orig/include/linux/fs.h 2006-05-10 08:29:26.000000000 -0700
+++ linux-2.6.17-rc3.save/include/linux/fs.h 2006-05-10 09:00:37.000000000 -0700
@@ -1594,11 +1594,8 @@ extern int generic_file_mmap(struct file
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
-extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
-extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *);
extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
@@ -1608,8 +1605,6 @@ extern ssize_t generic_file_buffered_wri
unsigned long, loff_t, loff_t *, size_t, ssize_t);
extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
-ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
extern void do_generic_mapping_read(struct address_space *mapping,
struct file_ra_state *, struct file *,
Index: linux-2.6.17-rc3.save/mm/filemap.c
===================================================================
--- linux-2.6.17-rc3.save.orig/mm/filemap.c 2006-05-10 08:23:47.000000000 -0700
+++ linux-2.6.17-rc3.save/mm/filemap.c 2006-05-10 08:44:01.000000000 -0700
@@ -1018,13 +1018,14 @@ success:
* that can use the page cache directly.
*/
ssize_t
-__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *filp = iocb->ki_filp;
ssize_t retval;
unsigned long seg;
size_t count;
+ loff_t *ppos = &iocb->ki_pos;

count = 0;
for (seg = 0; seg < nr_segs; seg++) {
@@ -1048,7 +1049,7 @@ __generic_file_aio_read(struct kiocb *io

/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
if (filp->f_flags & O_DIRECT) {
- loff_t pos = *ppos, size;
+ loff_t size;
struct address_space *mapping;
struct inode *inode;

@@ -1093,33 +1094,8 @@ out:
return retval;
}

-EXPORT_SYMBOL(__generic_file_aio_read);
-
-ssize_t
-generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- BUG_ON(iocb->ki_pos != pos);
- return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos);
-}
EXPORT_SYMBOL(generic_file_aio_read);

-ssize_t
-generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
- struct iovec local_iov = { .iov_base = buf, .iov_len = count };
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-
-EXPORT_SYMBOL(generic_file_read);
-
int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
{
ssize_t written;
@@ -2185,38 +2161,6 @@ ssize_t generic_file_aio_write_nolock(st
}
EXPORT_SYMBOL(generic_file_aio_write_nolock);

-static ssize_t
-__generic_file_write_nolock(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-
-ssize_t
-generic_file_write_nolock(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, *ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- *ppos = kiocb.ki_pos;
- return ret;
-}
-EXPORT_SYMBOL(generic_file_write_nolock);
-
ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -2242,30 +2186,6 @@ ssize_t generic_file_aio_write(struct ki
}
EXPORT_SYMBOL(generic_file_aio_write);

-ssize_t generic_file_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret;
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };
-
- mutex_lock(&inode->i_mutex);
- ret = __generic_file_write_nolock(file, &local_iov, 1, ppos);
- mutex_unlock(&inode->i_mutex);
-
- if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- ssize_t err;
-
- err = sync_page_range(inode, mapping, *ppos - ret, ret);
- if (err < 0)
- ret = err;
- }
- return ret;
-}
-EXPORT_SYMBOL(generic_file_write);
-
/*
* Called under i_mutex for writes to S_ISREG files. Returns -EIO if something
* went wrong during pagecache shootdown.
Index: linux-2.6.17-rc3.save/fs/xfs/linux-2.6/xfs_lrw.c
===================================================================
--- linux-2.6.17-rc3.save.orig/fs/xfs/linux-2.6/xfs_lrw.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/xfs/linux-2.6/xfs_lrw.c 2006-05-10 08:45:52.000000000 -0700
@@ -276,7 +276,9 @@ xfs_read(

xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
(void *)iovp, segs, *offset, ioflags);
- ret = __generic_file_aio_read(iocb, iovp, segs, offset);
+
+ iocb->ki_pos = *offset;
+ ret = generic_file_aio_read(iocb, iovp, segs, *offset);
if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
ret = wait_on_sync_kiocb(iocb);
if (ret > 0)


2006-05-11 18:37:06

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods

Badari Pulavarty <[email protected]> wrote:
>
> This patch vectorizes aio_read() and aio_write() methods to prepare
> for collapsing all aio & vectored operations into one interface -
> which is aio_read()/aio_write().

There've been significant ocfs2 changes. I redid things as below, but
didn't try super-hard. Please check that it all looks sane.

diff -puN fs/ocfs2/file.c~vectorize-aio_read-aio_write-methods fs/ocfs2/file.c
--- 25/fs/ocfs2/file.c~vectorize-aio_read-aio_write-methods Thu May 11 11:33:39 2006
+++ 25-akpm/fs/ocfs2/file.c Thu May 11 11:36:12 2006
@@ -960,25 +960,23 @@ static inline int ocfs2_write_should_rem
}

static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };
int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0;
u32 clusters;
struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
loff_t newsize, saved_pos;

- mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,
- (unsigned int)count,
+ mlog_entry("(0x%p, %u, '%.*s')\n", filp,
+ (unsigned int)nr_segs,
filp->f_dentry->d_name.len,
filp->f_dentry->d_name.name);

/* happy write of zero bytes */
- if (count == 0)
+ if (iocb->ki_left == 0)
return 0;

if (!inode) {
@@ -1047,7 +1045,7 @@ static ssize_t ocfs2_file_aio_write(stru
} else {
saved_pos = iocb->ki_pos;
}
- newsize = count + saved_pos;
+ newsize = iocb->ki_left + saved_pos;

mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
(long long) saved_pos, (long long) newsize,
@@ -1080,7 +1078,7 @@ static ssize_t ocfs2_file_aio_write(stru
if (!clusters)
break;

- ret = ocfs2_extend_file(inode, NULL, newsize, count);
+ ret = ocfs2_extend_file(inode, NULL, newsize, iocb->ki_left);
if (ret < 0) {
if (ret != -ENOSPC)
mlog_errno(ret);
@@ -1097,7 +1095,7 @@ static ssize_t ocfs2_file_aio_write(stru
/* communicate with ocfs2_dio_end_io */
ocfs2_iocb_set_rw_locked(iocb);

- ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
+ ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos);

/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
@@ -1131,16 +1129,16 @@ out:
}

static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
int ret = 0, rw_level = -1, have_alloc_sem = 0;
struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;

- mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,
- (unsigned int)count,
+ mlog_entry("(0x%p, %u, '%.*s')\n", filp,
+ (unsigned int)nr_segs,
filp->f_dentry->d_name.len,
filp->f_dentry->d_name.name);

@@ -1184,7 +1182,7 @@ static ssize_t ocfs2_file_aio_read(struc
}
ocfs2_meta_unlock(inode, 0);

- ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos);
+ ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
if (ret == -EINVAL)
mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n");


2006-05-11 18:45:13

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods

Badari Pulavarty <[email protected]> wrote:
>
> static ssize_t ep_aio_read_retry(struct kiocb *iocb)
> {
> struct kiocb_priv *priv = iocb->private;
> - ssize_t status = priv->actual;
> + ssize_t len, total;
>
> /* we "retry" to get the right mm context for this: */
> - status = copy_to_user(priv->ubuf, priv->buf, priv->actual);
> - if (unlikely(0 != status))
> - status = -EFAULT;
> - else
> - status = priv->actual;
> +
> + /* copy stuff into user buffers */
> + total = priv->actual;
> + len = 0;
> + for (i=0; i < priv->count; i++) {
> + ssize_t this = min(priv->iv[i].iov_len, total);
> +
> + if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this))
> + break;
> +
> + total -= this;
> + len += this;
> + if (total <= 0)
> + break;
> + }
> +
> + if (unlikely(len == 0))
> + len = -EFAULT;

This is still wrong, isn't it? Or am I looking at the same patch?

There's no way in which `total' can go negative, so it'd be nicer to just
test it for equality with zero. Because if it goes unexpectedly negative,
we _want_ the kernel to malfunction, rather than mysteriously covering
things up.

The final test there should be

if (unlikely(total != 0))

yes?

2006-05-11 18:50:19

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods

Badari Pulavarty <[email protected]> wrote:
>
> + size_t count = 0;
> +
> + for (seg = 0; seg < nr_segs; seg++)
> + count += iov[seg].iov_len;

We have iov_length() for this. pls review all patches, send updates if
appropriate.

2006-05-11 19:06:35

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods

On Thu, 2006-05-11 at 11:47 -0700, Andrew Morton wrote:
> Badari Pulavarty <[email protected]> wrote:
> >
> > static ssize_t ep_aio_read_retry(struct kiocb *iocb)
> > {
> > struct kiocb_priv *priv = iocb->private;
> > - ssize_t status = priv->actual;
> > + ssize_t len, total;
> >
> > /* we "retry" to get the right mm context for this: */
> > - status = copy_to_user(priv->ubuf, priv->buf, priv->actual);
> > - if (unlikely(0 != status))
> > - status = -EFAULT;
> > - else
> > - status = priv->actual;
> > +
> > + /* copy stuff into user buffers */
> > + total = priv->actual;
> > + len = 0;
> > + for (i=0; i < priv->count; i++) {
> > + ssize_t this = min(priv->iv[i].iov_len, total);
> > +
> > + if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this))
> > + break;
> > +
> > + total -= this;
> > + len += this;
> > + if (total <= 0)
> > + break;
> > + }
> > +
> > + if (unlikely(len == 0))
> > + len = -EFAULT;
>
> This is still wrong, isn't it? Or am I looking at the same patch?
>
> There's no way in which `total' can go negative, so it'd be nicer to just
> test it for equality with zero. Because if it goes unexpectedly negative,
> we _want_ the kernel to malfunction, rather than mysteriously covering
> things up.
>
> The final test there should be
>
> if (unlikely(total != 0))
>
> yes?

No. The original check is correct - we want to return EFAULT if
copy_to_user() failed and we haven't copied anything so far.
If we copied anything so far, we should return, that many bytes.
(like short-io).


Thanks,
Badari

2006-05-11 19:11:10

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods

On Thu, 2006-05-11 at 11:52 -0700, Andrew Morton wrote:
> Badari Pulavarty <[email protected]> wrote:
> >
> > + size_t count = 0;
> > +
> > + for (seg = 0; seg < nr_segs; seg++)
> > + count += iov[seg].iov_len;
>
> We have iov_length() for this. pls review all patches, send updates if
> appropriate.
>

Will do. That was temporarily added for handling NFS. Chuck needs
to re-write those portions to handle the vectors anyway.

Thanks,
Badari

2006-05-11 19:33:41

by Mark Fasheh

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods

On Thu, May 11, 2006 at 11:39:32AM -0700, Andrew Morton wrote:
> Badari Pulavarty <[email protected]> wrote:
> >
> > This patch vectorizes aio_read() and aio_write() methods to prepare
> > for collapsing all aio & vectored operations into one interface -
> > which is aio_read()/aio_write().
>
> There've been significant ocfs2 changes. I redid things as below, but
> didn't try super-hard. Please check that it all looks sane.
Yeah, that looks good. Thanks Andrew!
--Mark

--
Mark Fasheh
Senior Software Developer, Oracle
[email protected]

2006-05-11 20:19:08

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods

Badari Pulavarty <[email protected]> wrote:
>
> On Thu, 2006-05-11 at 11:47 -0700, Andrew Morton wrote:
> > Badari Pulavarty <[email protected]> wrote:
> > >
> > > static ssize_t ep_aio_read_retry(struct kiocb *iocb)
> > > {
> > > struct kiocb_priv *priv = iocb->private;
> > > - ssize_t status = priv->actual;
> > > + ssize_t len, total;
> > >
> > > /* we "retry" to get the right mm context for this: */
> > > - status = copy_to_user(priv->ubuf, priv->buf, priv->actual);
> > > - if (unlikely(0 != status))
> > > - status = -EFAULT;
> > > - else
> > > - status = priv->actual;
> > > +
> > > + /* copy stuff into user buffers */
> > > + total = priv->actual;
> > > + len = 0;
> > > + for (i=0; i < priv->count; i++) {
> > > + ssize_t this = min(priv->iv[i].iov_len, total);
> > > +
> > > + if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this))
> > > + break;
> > > +
> > > + total -= this;
> > > + len += this;
> > > + if (total <= 0)
> > > + break;
> > > + }
> > > +
> > > + if (unlikely(len == 0))
> > > + len = -EFAULT;
> >
> > This is still wrong, isn't it? Or am I looking at the same patch?
> >
> > There's no way in which `total' can go negative, so it'd be nicer to just
> > test it for equality with zero. Because if it goes unexpectedly negative,
> > we _want_ the kernel to malfunction, rather than mysteriously covering
> > things up.
> >
> > The final test there should be
> >
> > if (unlikely(total != 0))
> >
> > yes?
>
> No. The original check is correct - we want to return EFAULT if
> copy_to_user() failed and we haven't copied anything so far.
> If we copied anything so far, we should return, that many bytes.
> (like short-io).

oic. And we're sure that we cannot call into this code if someone's trying
a zero-sized read?

Either way, the below (which is faster!) will fix, yes?

--- 25/drivers/usb/gadget/inode.c~vectorize-aio_read-aio_write-methods-fix Thu May 11 11:53:41 2006
+++ 25-akpm/drivers/usb/gadget/inode.c Thu May 11 13:19:45 2006
@@ -567,18 +567,18 @@ static ssize_t ep_aio_read_retry(struct
for (i = 0; i < priv->count; i++) {
ssize_t this = min(priv->iv[i].iov_len, total);

- if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this))
+ if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this)) {
+ if (len == 0)
+ len = -EFAULT;
break;
+ }

total -= this;
len += this;
- if (total <= 0)
+ if (total == 0)
break;
}

- if (unlikely(len == 0))
- len = -EFAULT;
-
kfree(priv->buf);
kfree(priv);
aio_put_req(iocb);
_

2006-05-11 20:45:36

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods



Andrew Morton wrote:

>Badari Pulavarty <[email protected]> wrote:
>
>>On Thu, 2006-05-11 at 11:47 -0700, Andrew Morton wrote:
>>
>>>Badari Pulavarty <[email protected]> wrote:
>>>
>>>> static ssize_t ep_aio_read_retry(struct kiocb *iocb)
>>>> {
>>>> struct kiocb_priv *priv = iocb->private;
>>>>- ssize_t status = priv->actual;
>>>>+ ssize_t len, total;
>>>>
>>>> /* we "retry" to get the right mm context for this: */
>>>>- status = copy_to_user(priv->ubuf, priv->buf, priv->actual);
>>>>- if (unlikely(0 != status))
>>>>- status = -EFAULT;
>>>>- else
>>>>- status = priv->actual;
>>>>+
>>>>+ /* copy stuff into user buffers */
>>>>+ total = priv->actual;
>>>>+ len = 0;
>>>>+ for (i=0; i < priv->count; i++) {
>>>>+ ssize_t this = min(priv->iv[i].iov_len, total);
>>>>+
>>>>+ if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this))
>>>>+ break;
>>>>+
>>>>+ total -= this;
>>>>+ len += this;
>>>>+ if (total <= 0)
>>>>+ break;
>>>>+ }
>>>>+
>>>>+ if (unlikely(len == 0))
>>>>+ len = -EFAULT;
>>>>
>>>This is still wrong, isn't it? Or am I looking at the same patch?
>>>
>>>There's no way in which `total' can go negative, so it'd be nicer to just
>>>test it for equality with zero. Because if it goes unexpectedly negative,
>>>we _want_ the kernel to malfunction, rather than mysteriously covering
>>>things up.
>>>
>>>The final test there should be
>>>
>>> if (unlikely(total != 0))
>>>
>>>yes?
>>>
>>No. The original check is correct - we want to return EFAULT if
>>copy_to_user() failed and we haven't copied anything so far.
>>If we copied anything so far, we should return, that many bytes.
>>(like short-io).
>>
>
>oic. And we're sure that we cannot call into this code if someone's trying
>a zero-sized read?
>
>Either way, the below (which is faster!) will fix, yes?
>
>--- 25/drivers/usb/gadget/inode.c~vectorize-aio_read-aio_write-methods-fix Thu May 11 11:53:41 2006
>+++ 25-akpm/drivers/usb/gadget/inode.c Thu May 11 13:19:45 2006
>@@ -567,18 +567,18 @@ static ssize_t ep_aio_read_retry(struct
> for (i = 0; i < priv->count; i++) {
> ssize_t this = min(priv->iv[i].iov_len, total);
>
>- if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this))
>+ if (copy_to_user(priv->iv[i].iov_buf, priv->buf, this)) {
>+ if (len == 0)
>+ len = -EFAULT;
> break;
>+ }
>
> total -= this;
> len += this;
>- if (total <= 0)
>+ if (total == 0)
> break;
> }
>
>- if (unlikely(len == 0))
>- len = -EFAULT;
>-
> kfree(priv->buf);
> kfree(priv);
> aio_put_req(iocb);
>_
>
Yes, this is good.

No one should call into this code with size == 0, since we should have
returned
success without doing any IO in the first place.

Thanks,
Badari


2006-05-11 22:48:52

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods

On Thu, 2006-05-11 at 18:16 -0400, Chuck Lever wrote:
> Badari Pulavarty wrote:
> >
> >
> > Chuck Lever wrote:
> >
> >>
> >> Noticed these four file systems still appear to invoke
> >> generic_file_read/write:
> >>
> >> 0 fs/sysv/file.c <global> 25 .write = generic_file_write,
> >> 1 fs/ufs/file.c <global> 37 .write = generic_file_write,
> >> 2 fs/smbfs/file.c smb_file_write 341 result = generic_file_write(file,
> >> buf, count, ppos);
> >> 3 fs/udf/file.c udf_file_write 139 retval = generic_file_write(file,
> >> buf, count, ppos);
> >
> >
> > Hmm ? My 4th patch would get rid of generic_file_read() and
> > generic_file_write()
> > and all its users. (basically converted to do_sync_read/write).
> >
> > Where do you see these, after applying all 4 patches ? I can't see them
> > in my tree.
>
> Yes, I applied all 4 of the patches you mailed out today to the latest
> 2.6.17-rc3 git tree. Could be my mistake... but when I try to build the
> kernel with UDF enabled, the build fails because it can't find
> generic_file_read.
>

Hi Andrew,

Some how I missed updates to few filesystems. Here is the patch to fix
them.

Thanks,
Badari

Missed to convert few filesystems not to use generic_file_read
and generic_file_write interfaces.

Signed-off-by: Badari Pulavarty <[email protected]>

diff -Naurp -X /usr/src/dontdiff linux-2.6.17-rc3/fs/smbfs/file.c linux-2.6.17-rc3.save/fs/smbfs/file.c
--- linux-2.6.17-rc3/fs/smbfs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/smbfs/file.c 2006-05-09 15:30:32.000000000 -0700
@@ -233,7 +233,7 @@ smb_file_read(struct file * file, char _
(long)dentry->d_inode->i_size,
dentry->d_inode->i_flags, dentry->d_inode->i_atime);

- status = generic_file_read(file, buf, count, ppos);
+ status = do_sync_read(file, buf, count, ppos);
out:
return status;
}
@@ -338,7 +338,7 @@ smb_file_write(struct file *file, const
goto out;

if (count > 0) {
- result = generic_file_write(file, buf, count, ppos);
+ result = do_sync_write(file, buf, count, ppos);
VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n",
(long) file->f_pos, (long) dentry->d_inode->i_size,
dentry->d_inode->i_mtime, dentry->d_inode->i_atime);
diff -Naurp -X /usr/src/dontdiff linux-2.6.17-rc3/fs/sysv/file.c linux-2.6.17-rc3.save/fs/sysv/file.c
--- linux-2.6.17-rc3/fs/sysv/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/sysv/file.c 2006-05-09 15:25:00.000000000 -0700
@@ -21,8 +21,10 @@
*/
const struct file_operations sysv_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = sysv_sync_file,
.sendfile = generic_file_sendfile,
diff -Naurp -X /usr/src/dontdiff linux-2.6.17-rc3/fs/udf/file.c linux-2.6.17-rc3.save/fs/udf/file.c
--- linux-2.6.17-rc3/fs/udf/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/udf/file.c 2006-05-09 15:27:28.000000000 -0700
@@ -136,7 +136,7 @@ static ssize_t udf_file_write(struct fil
}
}

- retval = generic_file_write(file, buf, count, ppos);
+ retval = do_sync_write(file, buf, count, ppos);

if (retval > 0)
mark_inode_dirty(inode);
@@ -249,11 +249,13 @@ static int udf_release_file(struct inode
}

const struct file_operations udf_file_operations = {
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.ioctl = udf_ioctl,
.open = generic_file_open,
.mmap = generic_file_mmap,
.write = udf_file_write,
+ .aio_write = generic_file_aio_write,
.release = udf_release_file,
.fsync = udf_fsync_file,
.sendfile = generic_file_sendfile,
diff -Naurp -X /usr/src/dontdiff linux-2.6.17-rc3/fs/ufs/file.c linux-2.6.17-rc3.save/fs/ufs/file.c
--- linux-2.6.17-rc3/fs/ufs/file.c 2006-04-26 19:19:25.000000000 -0700
+++ linux-2.6.17-rc3.save/fs/ufs/file.c 2006-05-09 15:28:16.000000000 -0700
@@ -33,8 +33,10 @@

const struct file_operations ufs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.open = generic_file_open,
.sendfile = generic_file_sendfile,



2006-05-12 07:39:01

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods

On Thu, May 11, 2006 at 03:50:03PM -0700, Badari Pulavarty wrote:
> diff -Naurp -X /usr/src/dontdiff linux-2.6.17-rc3/fs/smbfs/file.c linux-2.6.17-rc3.save/fs/smbfs/file.c
> --- linux-2.6.17-rc3/fs/smbfs/file.c 2006-04-26 19:19:25.000000000 -0700
> +++ linux-2.6.17-rc3.save/fs/smbfs/file.c 2006-05-09 15:30:32.000000000 -0700
> @@ -233,7 +233,7 @@ smb_file_read(struct file * file, char _
> (long)dentry->d_inode->i_size,
> dentry->d_inode->i_flags, dentry->d_inode->i_atime);
>
> - status = generic_file_read(file, buf, count, ppos);
> + status = do_sync_read(file, buf, count, ppos);
> out:
> return status;
> }

this look wrong. The additional work in smb_file_read/smb_file_write
needs to be done in smb_file_aio_read/smb_file_aio_write, and .read/.write
can be set to do_sync_read/do_sync_write directly.

> diff -Naurp -X /usr/src/dontdiff linux-2.6.17-rc3/fs/udf/file.c linux-2.6.17-rc3.save/fs/udf/file.c
> --- linux-2.6.17-rc3/fs/udf/file.c 2006-04-26 19:19:25.000000000 -0700
> +++ linux-2.6.17-rc3.save/fs/udf/file.c 2006-05-09 15:27:28.000000000 -0700
> @@ -136,7 +136,7 @@ static ssize_t udf_file_write(struct fil
> }
> }
>
> - retval = generic_file_write(file, buf, count, ppos);
> + retval = do_sync_write(file, buf, count, ppos);

ditto. also IIRC this only happens for the udf write path.

2006-05-12 10:06:18

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods

Badari Pulavarty <[email protected]> wrote:
>
> drivers/usb/gadget/inode.c | 71 +++++++++++++++++++++++++++-----------

The changes in this file don't even approximately vaguely have the
remotest chance of compiling. Please send fix.

2006-05-12 10:12:06

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods

Andrew Morton <[email protected]> wrote:
>
> Please send fix.

On second thoughts, I'll drop them all. Too many fixups, this code needs
more work.

Please ensure that the next version passes allmodconfig without adding any
new warnings on both 32-bit and 64-bit compilers, thanks.

2006-05-12 13:56:09

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 1/4] Vectorize aio_read/aio_write methods



Andrew Morton wrote:

>Andrew Morton <[email protected]> wrote:
>
>>Please send fix.
>>
>
>On second thoughts, I'll drop them all. Too many fixups, this code needs
>more work.
>
>Please ensure that the next version passes allmodconfig without adding any
>new warnings on both 32-bit and 64-bit compilers, thanks.
>
Will do. I have been building and testing on 64-bit machines (amd64, ppc64).

Thanks,
Badari

>


2006-05-15 21:18:14

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 0/4] VFS fileop cleanups by collapsing AIO and vector IO

Hi Andrew,

These series of patches clean up and streamlines generic_file_*
interfaces in filemap.c. This time, to avoid public humiliation,
I compiled (allmodconfig) the patchset on 3 different architectures
(i386, x86_64, ppc64) and 4 different compilers versions and made
sure this patchset didn't introduce any new error & warnings :)

This is a patchset against 2.6.17-rc4, so won't apply cleanly on
-mm (few minor fixes in ocfs2, nfs & jffs2 needed). If you want me
to send patchset against 2.6.17-rc4-mm1, please let me know.

Note:

1. I couldn't reproduce the compiler warning you got:

fs/aio.c: In function `aio_advance_iovec':
fs/aio.c:1314: warning: comparison of distinct pointer types lacks a
cast

So, I didn't fix this.

=====

First (3) patches collapses all the vectored IO support into
single set of file-operation method using aio_read/aio_write.
This work was originally suggested & started by Christoph Hellwig,
when Zach Brown tried to add vectored support for AIO.

Patch 4, sets all the filesystems .read/.write/.aio_read/.aio_write
methods correctly to allow us to cleanup most generic_file_*_read/write
interfaces in filemap.c

After this patch set, we should end up with ONLY following
read/write (exported) interfaces in filemap.c:

generic_file_aio_read() - read handler
generic_file_aio_write() - write handler
generic_file_aio_write_nolock() - no lock write handler

Here is the summary:

[PATCH 1/4] Vectorize aio_read/aio_write methods

[PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write
instead.

[PATCH 3/4] Core aio changes to support vectored AIO.

[PATCH 4/4] Streamline generic_file_* interfaces and filemap cleanups

BTW, Chuck Lever is actually re-arranging NFS DIO, AIO code to
fit into this model.

Thanks to Chuck Lever, Shaggy, Christoph, Zach Brown, Ben LaHaise
for helping out.

Thanks,
Badari



2006-05-15 21:20:19

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 1/4] Vectorize aio_read/aio_write methods

This patch vectorizes aio_read() and aio_write() methods to prepare
for collapsing all aio & vectored operations into one interface -
which is aio_read()/aio_write().

Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Signed-off-by: Chuck Lever <[email protected]>

Documentation/filesystems/Locking | 5 +-
Documentation/filesystems/vfs.txt | 4 +-
drivers/char/raw.c | 14 -------
drivers/usb/gadget/inode.c | 74 ++++++++++++++++++++++++++++----------
fs/aio.c | 15 +++++--
fs/block_dev.c | 10 -----
fs/cifs/cifsfs.c | 6 +--
fs/ext3/file.c | 5 +-
fs/nfs/direct.c | 26 ++++++++++---
fs/nfs/file.c | 39 ++++++++++----------
fs/ntfs/file.c | 8 +---
fs/ocfs2/file.c | 28 ++++++--------
fs/read_write.c | 20 ++++++++--
fs/reiserfs/file.c | 12 +-----
fs/xfs/linux-2.6/xfs_file.c | 44 ++++++++++------------
include/linux/aio.h | 2 +
include/linux/fs.h | 10 ++---
include/linux/nfs_fs.h | 8 ++--
include/net/sock.h | 1
mm/filemap.c | 38 +++++++++----------
net/socket.c | 48 ++++++++++++------------
21 files changed, 227 insertions(+), 190 deletions(-)

Index: linux-2.6.17-rc4/Documentation/filesystems/Locking
===================================================================
--- linux-2.6.17-rc4.orig/Documentation/filesystems/Locking 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/Documentation/filesystems/Locking 2006-05-12 09:39:48.000000000 -0700
@@ -355,10 +355,9 @@ The last two are called only from check_
prototypes:
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t,
- loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int,
Index: linux-2.6.17-rc4/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.17-rc4.orig/Documentation/filesystems/vfs.txt 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/Documentation/filesystems/vfs.txt 2006-05-15 14:13:52.852797344 -0700
@@ -699,9 +699,9 @@ This describes how the VFS can manipulat
struct file_operations {
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
Index: linux-2.6.17-rc4/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc4.orig/drivers/char/raw.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/drivers/char/raw.c 2006-05-15 14:13:52.852797344 -0700
@@ -250,23 +250,11 @@ static ssize_t raw_file_write(struct fil
return generic_file_write_nolock(file, &local_iov, 1, ppos);
}

-static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
-{
- struct iovec local_iov = {
- .iov_base = (char __user *)buf,
- .iov_len = count
- };
-
- return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-}
-
-
static struct file_operations raw_fops = {
.read = generic_file_read,
.aio_read = generic_file_aio_read,
.write = raw_file_write,
- .aio_write = raw_file_aio_write,
+ .aio_write = generic_file_aio_write_nolock,
.open = raw_open,
.release= raw_release,
.ioctl = raw_ioctl,
Index: linux-2.6.17-rc4/fs/aio.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/aio.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/fs/aio.c 2006-05-15 14:09:44.399567976 -0700
@@ -15,6 +15,7 @@
#include <linux/aio_abi.h>
#include <linux/module.h>
#include <linux/syscalls.h>
+#include <linux/uio.h>

#define DEBUG 0

@@ -1315,8 +1316,11 @@ static ssize_t aio_pread(struct kiocb *i
ssize_t ret = 0;

do {
- ret = file->f_op->aio_read(iocb, iocb->ki_buf,
- iocb->ki_left, iocb->ki_pos);
+ iocb->ki_inline_vec.iov_base = iocb->ki_buf;
+ iocb->ki_inline_vec.iov_len = iocb->ki_left;
+
+ ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec,
+ 1, iocb->ki_pos);
/*
* Can't just depend on iocb->ki_left to determine
* whether we are done. This may have been a short read.
@@ -1349,8 +1353,11 @@ static ssize_t aio_pwrite(struct kiocb *
ssize_t ret = 0;

do {
- ret = file->f_op->aio_write(iocb, iocb->ki_buf,
- iocb->ki_left, iocb->ki_pos);
+ iocb->ki_inline_vec.iov_base = iocb->ki_buf;
+ iocb->ki_inline_vec.iov_len = iocb->ki_left;
+
+ ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec,
+ 1, iocb->ki_pos);
if (ret > 0) {
iocb->ki_buf += ret;
iocb->ki_left -= ret;
Index: linux-2.6.17-rc4/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/block_dev.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/fs/block_dev.c 2006-05-15 14:13:52.848797952 -0700
@@ -1064,14 +1064,6 @@ static ssize_t blkdev_file_write(struct
return generic_file_write_nolock(file, &local_iov, 1, ppos);
}

-static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
-{
- struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
-}
-
static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
@@ -1094,7 +1086,7 @@ const struct file_operations def_blk_fop
.read = generic_file_read,
.write = blkdev_file_write,
.aio_read = generic_file_aio_read,
- .aio_write = blkdev_file_aio_write,
+ .aio_write = generic_file_aio_write_nolock,
.mmap = generic_file_mmap,
.fsync = block_fsync,
.unlocked_ioctl = block_ioctl,
Index: linux-2.6.17-rc4/fs/cifs/cifsfs.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/cifs/cifsfs.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/fs/cifs/cifsfs.c 2006-05-15 14:13:52.846798256 -0700
@@ -496,13 +496,13 @@ static ssize_t cifs_file_writev(struct f
return written;
}

-static ssize_t cifs_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
ssize_t written;

- written = generic_file_aio_write(iocb, buf, count, pos);
+ written = generic_file_aio_write(iocb, iov, nr_segs, pos);
if (!CIFS_I(inode)->clientCanCacheAll)
filemap_fdatawrite(inode->i_mapping);
return written;
Index: linux-2.6.17-rc4/fs/ext3/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ext3/file.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/fs/ext3/file.c 2006-05-15 14:13:52.847798104 -0700
@@ -48,14 +48,15 @@ static int ext3_release_file (struct ino
}

static ssize_t
-ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_dentry->d_inode;
ssize_t ret;
int err;

- ret = generic_file_aio_write(iocb, buf, count, pos);
+ ret = generic_file_aio_write(iocb, iov, nr_segs, pos);

/*
* Skip flushing if there was an error, or if nothing was written.
Index: linux-2.6.17-rc4/fs/read_write.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/read_write.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/fs/read_write.c 2006-05-15 14:13:52.849797800 -0700
@@ -227,14 +227,20 @@ static void wait_on_retry_sync_kiocb(str

ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
+ struct iovec iov = { .iov_base = buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;

init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- while (-EIOCBRETRY ==
- (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
+ kiocb.ki_left = len;
+
+ for (;;) {
+ ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
wait_on_retry_sync_kiocb(&kiocb);
+ }

if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
@@ -279,14 +285,20 @@ EXPORT_SYMBOL(vfs_read);

ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{
+ struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;

init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- while (-EIOCBRETRY ==
- (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos)))
+ kiocb.ki_left = len;
+
+ for (;;) {
+ ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
wait_on_retry_sync_kiocb(&kiocb);
+ }

if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
Index: linux-2.6.17-rc4/fs/reiserfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/reiserfs/file.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/fs/reiserfs/file.c 2006-05-12 09:39:48.000000000 -0700
@@ -1329,7 +1329,7 @@ static ssize_t reiserfs_file_write(struc
if (err)
return err;
}
- result = generic_file_write(file, buf, count, ppos);
+ result = do_sync_write(file, buf, count, ppos);

if (after_file_end) { /* Now update i_size and remove the savelink */
struct reiserfs_transaction_handle th;
@@ -1560,14 +1560,8 @@ static ssize_t reiserfs_file_write(struc
return res;
}

-static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
- size_t count, loff_t pos)
-{
- return generic_file_aio_write(iocb, buf, count, pos);
-}
-
const struct file_operations reiserfs_file_operations = {
- .read = generic_file_read,
+ .read = do_sync_read,
.write = reiserfs_file_write,
.ioctl = reiserfs_ioctl,
.mmap = generic_file_mmap,
@@ -1575,7 +1569,7 @@ const struct file_operations reiserfs_fi
.fsync = reiserfs_sync_file,
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
- .aio_write = reiserfs_aio_write,
+ .aio_write = generic_file_aio_write,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
Index: linux-2.6.17-rc4/fs/xfs/linux-2.6/xfs_file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/xfs/linux-2.6/xfs_file.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/fs/xfs/linux-2.6/xfs_file.c 2006-05-15 14:13:52.846798256 -0700
@@ -51,12 +51,11 @@ static struct vm_operations_struct xfs_d
STATIC inline ssize_t
__xfs_file_read(
struct kiocb *iocb,
- char __user *buf,
+ const struct iovec *iov,
+ unsigned long nr_segs,
int ioflags,
- size_t count,
loff_t pos)
{
- struct iovec iov = {buf, count};
struct file *file = iocb->ki_filp;
vnode_t *vp = vn_from_inode(file->f_dentry->d_inode);
ssize_t rval;
@@ -65,39 +64,38 @@ __xfs_file_read(

if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
- VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+ VOP_READ(vp, iocb, iov, nr_segs, &iocb->ki_pos, ioflags, NULL, rval);
return rval;
}

STATIC ssize_t
xfs_file_aio_read(
struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_read(iocb, buf, IO_ISAIO, count, pos);
+ return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
}

STATIC ssize_t
xfs_file_aio_read_invis(
struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+ return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

STATIC inline ssize_t
__xfs_file_write(
- struct kiocb *iocb,
- const char __user *buf,
- int ioflags,
- size_t count,
- loff_t pos)
+ struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs,
+ int ioflags,
+ loff_t pos)
{
- struct iovec iov = {(void __user *)buf, count};
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
vnode_t *vp = vn_from_inode(inode);
@@ -107,28 +105,28 @@ __xfs_file_write(
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;

- VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+ VOP_WRITE(vp, iocb, iov, nr_segs, &iocb->ki_pos, ioflags, NULL, rval);
return rval;
}

STATIC ssize_t
xfs_file_aio_write(
struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_write(iocb, buf, IO_ISAIO, count, pos);
+ return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
}

STATIC ssize_t
xfs_file_aio_write_invis(
struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+ return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

STATIC inline ssize_t
Index: linux-2.6.17-rc4/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc4.orig/include/linux/fs.h 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/include/linux/fs.h 2006-05-15 14:13:52.853797192 -0700
@@ -1015,9 +1015,9 @@ struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
@@ -1594,11 +1594,11 @@ extern int file_send_actor(read_descript
extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *);
-extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t);
+extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *);
-extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t);
+extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
- unsigned long, loff_t *);
+ unsigned long, loff_t);
extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
unsigned long *, loff_t, loff_t *, size_t, size_t);
extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
Index: linux-2.6.17-rc4/include/net/sock.h
===================================================================
--- linux-2.6.17-rc4.orig/include/net/sock.h 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/include/net/sock.h 2006-05-12 09:39:48.000000000 -0700
@@ -659,7 +659,6 @@ struct sock_iocb {
struct sock *sk;
struct scm_cookie *scm;
struct msghdr *msg, async_msg;
- struct iovec async_iov;
struct kiocb *kiocb;
};

Index: linux-2.6.17-rc4/mm/filemap.c
===================================================================
--- linux-2.6.17-rc4.orig/mm/filemap.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/mm/filemap.c 2006-05-15 14:13:52.850797648 -0700
@@ -1128,14 +1128,12 @@ out:
EXPORT_SYMBOL(__generic_file_aio_read);

ssize_t
-generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
- struct iovec local_iov = { .iov_base = buf, .iov_len = count };
-
BUG_ON(iocb->ki_pos != pos);
- return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
+ return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos);
}
-
EXPORT_SYMBOL(generic_file_aio_read);

ssize_t
@@ -2195,22 +2193,21 @@ out:
current->backing_dev_info = NULL;
return written ? written : err;
}
-EXPORT_SYMBOL(generic_file_aio_write_nolock);

-ssize_t
-generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
+ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
+ const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- loff_t pos = *ppos;

- ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos);
+ BUG_ON(iocb->ki_pos != pos);
+
+ ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);

if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
+ ssize_t err;

err = sync_page_range_nolock(inode, mapping, pos, ret);
if (err < 0)
@@ -2218,6 +2215,7 @@ generic_file_aio_write_nolock(struct kio
}
return ret;
}
+EXPORT_SYMBOL(generic_file_aio_write_nolock);

static ssize_t
__generic_file_write_nolock(struct file *file, const struct iovec *iov,
@@ -2227,8 +2225,9 @@ __generic_file_write_nolock(struct file
ssize_t ret;

init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = *ppos;
ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
- if (ret == -EIOCBQUEUED)
+ if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
return ret;
}
@@ -2241,28 +2240,27 @@ generic_file_write_nolock(struct file *f
ssize_t ret;

init_sync_kiocb(&kiocb, file);
- ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
+ kiocb.ki_pos = *ppos;
+ ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, *ppos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
return ret;
}
EXPORT_SYMBOL(generic_file_write_nolock);

-ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };

BUG_ON(iocb->ki_pos != pos);

mutex_lock(&inode->i_mutex);
- ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1,
- &iocb->ki_pos);
+ ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);

if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
Index: linux-2.6.17-rc4/net/socket.c
===================================================================
--- linux-2.6.17-rc4.orig/net/socket.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/net/socket.c 2006-05-15 14:13:52.850797648 -0700
@@ -96,10 +96,10 @@
#include <linux/netfilter.h>

static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
- size_t size, loff_t pos);
-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t size, loff_t pos);
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
static int sock_mmap(struct file *file, struct vm_area_struct * vma);

static int sock_close(struct inode *inode, struct file *file);
@@ -702,7 +702,7 @@ static ssize_t sock_sendpage(struct file
}

static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
- char __user *ubuf, size_t size, struct sock_iocb *siocb)
+ struct sock_iocb *siocb)
{
if (!is_sync_kiocb(iocb)) {
siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
@@ -712,15 +712,13 @@ static struct sock_iocb *alloc_sock_iocb
}

siocb->kiocb = iocb;
- siocb->async_iov.iov_base = ubuf;
- siocb->async_iov.iov_len = size;
-
iocb->private = siocb;
return siocb;
}

static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
- struct file *file, struct iovec *iov, unsigned long nr_segs)
+ struct file *file, const struct iovec *iov,
+ unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
@@ -751,31 +749,33 @@ static ssize_t sock_readv(struct file *f
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;

- ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+ ret = do_sock_read(&msg, &iocb, file, iov, nr_segs);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}

-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
- size_t count, loff_t pos)
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;

if (pos != 0)
return -ESPIPE;
- if (count == 0) /* Match SYS5 behaviour */
+
+ if (iocb->ki_left == 0) /* Match SYS5 behaviour */
return 0;

- x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
+
+ x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;
- return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
- &x->async_iov, 1);
+ return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}

static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
- struct file *file, struct iovec *iov, unsigned long nr_segs)
+ struct file *file, const struct iovec *iov,
+ unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
@@ -808,28 +808,28 @@ static ssize_t sock_writev(struct file *
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;

- ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
+ ret = do_sock_write(&msg, &iocb, file, iov, nr_segs);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}

-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
- size_t count, loff_t pos)
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;

if (pos != 0)
return -ESPIPE;
- if (count == 0) /* Match SYS5 behaviour */
+
+ if (iocb->ki_left == 0) /* Match SYS5 behaviour */
return 0;

- x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
+ x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;

- return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
- &x->async_iov, 1);
+ return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}


Index: linux-2.6.17-rc4/drivers/usb/gadget/inode.c
===================================================================
--- linux-2.6.17-rc4.orig/drivers/usb/gadget/inode.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/drivers/usb/gadget/inode.c 2006-05-15 13:57:02.115452776 -0700
@@ -528,7 +528,8 @@ struct kiocb_priv {
struct usb_request *req;
struct ep_data *epdata;
void *buf;
- char __user *ubuf;
+ const struct iovec *iv;
+ unsigned long nr_segs;
unsigned actual;
};

@@ -556,18 +557,33 @@ static int ep_aio_cancel(struct kiocb *i
static ssize_t ep_aio_read_retry(struct kiocb *iocb)
{
struct kiocb_priv *priv = iocb->private;
- ssize_t status = priv->actual;
+ ssize_t len, total;
+ int i;

/* we "retry" to get the right mm context for this: */
- status = copy_to_user(priv->ubuf, priv->buf, priv->actual);
- if (unlikely(0 != status))
- status = -EFAULT;
- else
- status = priv->actual;
+
+ /* copy stuff into user buffers */
+ total = priv->actual;
+ len = 0;
+ for (i=0; i < priv->nr_segs; i++) {
+ ssize_t this = min((ssize_t)(priv->iv[i].iov_len), total);
+
+ if (copy_to_user(priv->iv[i].iov_base, priv->buf, this)) {
+ if (len == 0)
+ len = -EFAULT;
+ break;
+ }
+
+ total -= this;
+ len += this;
+ if (total == 0)
+ break;
+ }
+
kfree(priv->buf);
kfree(priv);
aio_put_req(iocb);
- return status;
+ return len;
}

static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
@@ -615,7 +631,8 @@ ep_aio_rwtail(
char *buf,
size_t len,
struct ep_data *epdata,
- char __user *ubuf
+ const struct iovec *iv,
+ unsigned long nr_segs
)
{
struct kiocb_priv *priv = (void *) &iocb->private;
@@ -630,7 +647,8 @@ fail:
return value;
}
iocb->private = priv;
- priv->ubuf = ubuf;
+ priv->iv = iv;
+ priv->nr_segs = nr_segs;

value = get_ready_ep(iocb->ki_filp->f_flags, epdata);
if (unlikely(value < 0)) {
@@ -675,36 +693,54 @@ fail:
}

static ssize_t
-ep_aio_read(struct kiocb *iocb, char __user *ubuf, size_t len, loff_t o)
+ep_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t o)
{
struct ep_data *epdata = iocb->ki_filp->private_data;
char *buf;

if (unlikely(epdata->desc.bEndpointAddress & USB_DIR_IN))
return -EINVAL;
- buf = kmalloc(len, GFP_KERNEL);
+
+ if (iocb->ki_left == 0)
+ return 0;
+
+ buf = kmalloc(iocb->ki_left, GFP_KERNEL);
if (unlikely(!buf))
return -ENOMEM;
+
iocb->ki_retry = ep_aio_read_retry;
- return ep_aio_rwtail(iocb, buf, len, epdata, ubuf);
+ return ep_aio_rwtail(iocb, buf, iocb->ki_left, epdata, iov, nr_segs);
}

static ssize_t
-ep_aio_write(struct kiocb *iocb, const char __user *ubuf, size_t len, loff_t o)
+ep_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t o)
{
struct ep_data *epdata = iocb->ki_filp->private_data;
char *buf;
+ size_t len = 0;
+ int i = 0;

if (unlikely(!(epdata->desc.bEndpointAddress & USB_DIR_IN)))
return -EINVAL;
- buf = kmalloc(len, GFP_KERNEL);
+
+ if (iocb->ki_left == 0)
+ return 0;
+
+ buf = kmalloc(iocb->ki_left, GFP_KERNEL);
if (unlikely(!buf))
return -ENOMEM;
- if (unlikely(copy_from_user(buf, ubuf, len) != 0)) {
- kfree(buf);
- return -EFAULT;
+
+ for (i=0; i < nr_segs; i++) {
+ if (unlikely(copy_from_user(&buf[len], iov[i].iov_base,
+ iov[i].iov_len) != 0)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+ len += iov[i].iov_len;
}
- return ep_aio_rwtail(iocb, buf, len, epdata, NULL);
+ return ep_aio_rwtail(iocb, buf, len, epdata, NULL, 0);
}

/*----------------------------------------------------------------------*/
Index: linux-2.6.17-rc4/include/linux/aio.h
===================================================================
--- linux-2.6.17-rc4.orig/include/linux/aio.h 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/include/linux/aio.h 2006-05-15 14:09:44.400567824 -0700
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/aio_abi.h>
+#include <linux/uio.h>

#include <asm/atomic.h>

@@ -112,6 +113,7 @@ struct kiocb {
long ki_retried; /* just for testing */
long ki_kicked; /* just for testing */
long ki_queued; /* just for testing */
+ struct iovec ki_inline_vec; /* inline vector */

struct list_head ki_list; /* the aio core uses this
* for cancellation */
Index: linux-2.6.17-rc4/fs/nfs/direct.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/nfs/direct.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/fs/nfs/direct.c 2006-05-12 09:39:48.000000000 -0700
@@ -745,8 +745,8 @@ static ssize_t nfs_direct_write(struct k
/**
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
- * @buf: user's buffer into which to read data
- * @count: number of bytes to read
+ * @iov: vector of user buffers into which to read data
+ * @nr_segs: size of iov vector
* @pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
@@ -763,19 +763,26 @@ static ssize_t nfs_direct_write(struct k
* client must read the updated atime from the server back into its
* cache.
*/
-ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
ssize_t retval = -EINVAL;
int page_count;
struct page **pages;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
+ /* XXX: temporary */
+ const char __user *buf = iov[0].iov_base;
+ size_t count = iov[0].iov_len;

dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);

+ if (nr_segs != 1)
+ return -EINVAL;
+
if (count < 0)
goto out;
retval = -EFAULT;
@@ -807,8 +814,8 @@ out:
/**
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
- * @buf: user's buffer from which to write data
- * @count: number of bytes to write
+ * @iov: vector of user buffers from which to write data
+ * @nr_segs: size of iov vector
* @pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
@@ -829,19 +836,26 @@ out:
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
ssize_t retval;
int page_count;
struct page **pages;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
+ /* XXX: temporary */
+ const char __user *buf = iov[0].iov_base;
+ size_t count = iov[0].iov_len;

dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);

+ if (nr_segs != 1)
+ return -EINVAL;
+
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
goto out;
Index: linux-2.6.17-rc4/fs/nfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/nfs/file.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/fs/nfs/file.c 2006-05-12 10:24:59.000000000 -0700
@@ -41,8 +41,10 @@ static int nfs_file_release(struct inode
static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
static int nfs_file_mmap(struct file *, struct vm_area_struct *);
static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
-static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
-static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
+static ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
+static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
static int nfs_file_flush(struct file *);
static int nfs_fsync(struct file *, struct dentry *dentry, int datasync);
static int nfs_check_flags(int flags);
@@ -53,8 +55,8 @@ const struct file_operations nfs_file_op
.llseek = nfs_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .aio_read = nfs_file_read,
- .aio_write = nfs_file_write,
+ .aio_read = nfs_file_read,
+ .aio_write = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs_file_open,
.flush = nfs_file_flush,
@@ -212,26 +214,27 @@ nfs_file_flush(struct file *file)
return status;
}

-static ssize_t
-nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
+static ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
+ size_t count = iov_length(iov, nr_segs);

#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_read(iocb, buf, count, pos);
+ return nfs_file_direct_read(iocb, iov, nr_segs, pos);
#endif

- dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
+ dfprintk(VFS, "nfs: read(%s/%s, %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- (unsigned long) count, (unsigned long) pos);
+ (unsigned long) count, (long long) pos);

result = nfs_revalidate_file(inode, iocb->ki_filp);
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
if (!result)
- result = generic_file_aio_read(iocb, buf, count, pos);
+ result = generic_file_aio_read(iocb, iov, nr_segs, pos);
return result;
}

@@ -343,24 +346,22 @@ struct address_space_operations nfs_file
#endif
};

-/*
- * Write to a file (through the page cache).
- */
-static ssize_t
-nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
+ size_t count = iov_length(iov, nr_segs);

#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_write(iocb, buf, count, pos);
+ return nfs_file_direct_write(iocb, iov, nr_segs, pos);
#endif

- dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
+ dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- inode->i_ino, (unsigned long) count, (unsigned long) pos);
+ inode->i_ino, (unsigned long) count, (long long) pos);

result = -EBUSY;
if (IS_SWAPFILE(inode))
@@ -380,7 +381,7 @@ nfs_file_write(struct kiocb *iocb, const
goto out;

nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
- result = generic_file_aio_write(iocb, buf, count, pos);
+ result = generic_file_aio_write(iocb, iov, nr_segs, pos);
out:
return result;

Index: linux-2.6.17-rc4/include/linux/nfs_fs.h
===================================================================
--- linux-2.6.17-rc4.orig/include/linux/nfs_fs.h 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/include/linux/nfs_fs.h 2006-05-12 09:39:48.000000000 -0700
@@ -359,10 +359,10 @@ extern int nfs3_removexattr (struct dent
*/
extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
unsigned long);
-extern ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf,
- size_t count, loff_t pos);
-extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos);
+extern ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
+extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);

/*
* linux/fs/nfs/dir.c
Index: linux-2.6.17-rc4/fs/ocfs2/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ocfs2/file.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/fs/ocfs2/file.c 2006-05-12 09:39:48.000000000 -0700
@@ -929,25 +929,23 @@ static inline int ocfs2_write_should_rem
}

static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
- const char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };
int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0;
u32 clusters;
struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
loff_t newsize, saved_pos;

- mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,
- (unsigned int)count,
+ mlog_entry("(0x%p, %u, '%.*s')\n", filp,
+ (unsigned int)nr_segs,
filp->f_dentry->d_name.len,
filp->f_dentry->d_name.name);

/* happy write of zero bytes */
- if (count == 0)
+ if (iocb->ki_left == 0)
return 0;

if (!inode) {
@@ -1016,7 +1014,7 @@ static ssize_t ocfs2_file_aio_write(stru
} else {
saved_pos = iocb->ki_pos;
}
- newsize = count + saved_pos;
+ newsize = iocb->ki_left + saved_pos;

mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
(long long) saved_pos, (long long) newsize,
@@ -1059,7 +1057,7 @@ static ssize_t ocfs2_file_aio_write(stru
/* Fill any holes which would've been created by this
* write. If we're O_APPEND, this will wind up
* (correctly) being a noop. */
- ret = ocfs2_zero_extend(inode, (u64) newsize - count);
+ ret = ocfs2_zero_extend(inode, (u64) newsize - iocb->ki_left);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -1075,7 +1073,7 @@ static ssize_t ocfs2_file_aio_write(stru
/* communicate with ocfs2_dio_end_io */
ocfs2_iocb_set_rw_locked(iocb);

- ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
+ ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos);

/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
@@ -1109,16 +1107,16 @@ out:
}

static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
- char __user *buf,
- size_t count,
+ const struct iovec *iov,
+ unsigned long nr_segs,
loff_t pos)
{
int ret = 0, rw_level = -1, have_alloc_sem = 0;
struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;

- mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,
- (unsigned int)count,
+ mlog_entry("(0x%p, %u, '%.*s')\n", filp,
+ (unsigned int)nr_segs,
filp->f_dentry->d_name.len,
filp->f_dentry->d_name.name);

@@ -1146,7 +1144,7 @@ static ssize_t ocfs2_file_aio_read(struc
ocfs2_iocb_set_rw_locked(iocb);
}

- ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos);
+ ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
if (ret == -EINVAL)
mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n");

Index: linux-2.6.17-rc4/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ntfs/file.c 2006-05-11 16:31:53.000000000 -0700
+++ linux-2.6.17-rc4/fs/ntfs/file.c 2006-05-15 14:13:52.848797952 -0700
@@ -2174,20 +2174,18 @@ out:
/**
* ntfs_file_aio_write -
*/
-static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf,
- size_t count, loff_t pos)
+static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };

BUG_ON(iocb->ki_pos != pos);

mutex_lock(&inode->i_mutex);
- ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
+ ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err = sync_page_range(inode, mapping, pos, ret);


2006-05-15 21:21:52

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 3/4] Core aio changes to support vectored AIO

This work is initially done by Zach Brown to add support for
vectored aio. These are the core changes for AIO to support
IOCB_CMD_PREADV/IOCB_CMD_PWRITEV.

I made few extra changes beyond Zach's work. They are
- took out aio_pread/aio_pwrite and made them
a special case into vectored support
- added single inlined vector to save on kmalloc()
for a simple aio_read/aio_write

Signed-off-by: Zach Brown <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Signed-off-by: Badari Pulavarty <[email protected]>
Acked-by: Benjamin LaHaise <[email protected]>

fs/aio.c | 165 +++++++++++++++++++++++++++++++++---------------
fs/read_write.c | 127 +++++++++++++++++++++---------------
include/linux/aio.h | 4 +
include/linux/aio_abi.h | 2
include/linux/fs.h | 5 +
5 files changed, 199 insertions(+), 104 deletions(-)

Index: linux-2.6.17-rc4/fs/aio.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/aio.c 2006-05-15 14:09:44.399567976 -0700
+++ linux-2.6.17-rc4/fs/aio.c 2006-05-15 14:14:02.691301664 -0700
@@ -415,6 +415,7 @@ static struct kiocb fastcall *__aio_get_
req->ki_retry = NULL;
req->ki_dtor = NULL;
req->private = NULL;
+ req->ki_iovec = NULL;
INIT_LIST_HEAD(&req->ki_run_list);

/* Check if the completion queue has enough free space to
@@ -460,6 +461,8 @@ static inline void really_put_req(struct

if (req->ki_dtor)
req->ki_dtor(req);
+ if (req->ki_iovec != &req->ki_inline_vec)
+ kfree(req->ki_iovec);
kmem_cache_free(kiocb_cachep, req);
ctx->reqs_active--;

@@ -1301,69 +1304,63 @@ asmlinkage long sys_io_destroy(aio_conte
return -EINVAL;
}

-/*
- * aio_p{read,write} are the default ki_retry methods for
- * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially
- * multiple calls to f_op->aio_read(). They loop around partial progress
- * instead of returning -EIOCBRETRY because they don't have the means to call
- * kick_iocb().
- */
-static ssize_t aio_pread(struct kiocb *iocb)
+static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret)
{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret = 0;
+ struct iovec *iov = &iocb->ki_iovec[iocb->ki_cur_seg];

- do {
- iocb->ki_inline_vec.iov_base = iocb->ki_buf;
- iocb->ki_inline_vec.iov_len = iocb->ki_left;
+ BUG_ON(ret <= 0);

- ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec,
- 1, iocb->ki_pos);
- /*
- * Can't just depend on iocb->ki_left to determine
- * whether we are done. This may have been a short read.
- */
- if (ret > 0) {
- iocb->ki_buf += ret;
- iocb->ki_left -= ret;
+ while (iocb->ki_cur_seg < iocb->ki_nr_segs && ret > 0) {
+ ssize_t this = min((ssize_t)iov->iov_len, ret);
+ iov->iov_base += this;
+ iov->iov_len -= this;
+ iocb->ki_left -= this;
+ ret -= this;
+ if (iov->iov_len == 0) {
+ iocb->ki_cur_seg++;
+ iov++;
}
+ }

- /*
- * For pipes and sockets we return once we have some data; for
- * regular files we retry till we complete the entire read or
- * find that we can't read any more data (e.g short reads).
- */
- } while (ret > 0 && iocb->ki_left > 0 &&
- !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
-
- /* This means we must have transferred all that we could */
- /* No need to retry anymore */
- if ((ret == 0) || (iocb->ki_left == 0))
- ret = iocb->ki_nbytes - iocb->ki_left;
-
- return ret;
+ /* the caller should not have done more io than what fit in
+ * the remaining iovecs */
+ BUG_ON(ret > 0 && iocb->ki_left == 0);
}

-/* see aio_pread() */
-static ssize_t aio_pwrite(struct kiocb *iocb)
+static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
{
struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ ssize_t (*rw_op)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
ssize_t ret = 0;
+ unsigned short opcode;
+
+ if ((iocb->ki_opcode == IOCB_CMD_PREADV) ||
+ (iocb->ki_opcode == IOCB_CMD_PREAD)) {
+ rw_op = file->f_op->aio_read;
+ opcode = IOCB_CMD_PREADV;
+ } else {
+ rw_op = file->f_op->aio_write;
+ opcode = IOCB_CMD_PWRITEV;
+ }

do {
- iocb->ki_inline_vec.iov_base = iocb->ki_buf;
- iocb->ki_inline_vec.iov_len = iocb->ki_left;
+ ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
+ iocb->ki_nr_segs - iocb->ki_cur_seg,
+ iocb->ki_pos);
+ if (ret > 0)
+ aio_advance_iovec(iocb, ret);

- ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec,
- 1, iocb->ki_pos);
- if (ret > 0) {
- iocb->ki_buf += ret;
- iocb->ki_left -= ret;
- }
- } while (ret > 0 && iocb->ki_left > 0);
+ /* retry all partial writes. retry partial reads as long as its a
+ * regular file. */
+ } while (ret > 0 && iocb->ki_left > 0 &&
+ (opcode == IOCB_CMD_PWRITEV ||
+ (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));

+ /* This means we must have transferred all that we could */
+ /* No need to retry anymore */
if ((ret == 0) || (iocb->ki_left == 0))
ret = iocb->ki_nbytes - iocb->ki_left;

@@ -1390,6 +1387,38 @@ static ssize_t aio_fsync(struct kiocb *i
return ret;
}

+static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb)
+{
+ ssize_t ret;
+
+ ret = rw_copy_check_uvector((struct iovec __user *)kiocb->ki_buf,
+ kiocb->ki_nbytes, 1,
+ &kiocb->ki_inline_vec, &kiocb->ki_iovec);
+ if (ret < 0)
+ goto out;
+
+ kiocb->ki_nr_segs = kiocb->ki_nbytes;
+ kiocb->ki_cur_seg = 0;
+ /* ki_nbytes/left now reflect bytes instead of segs */
+ kiocb->ki_nbytes = ret;
+ kiocb->ki_left = ret;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
+{
+ kiocb->ki_iovec = &kiocb->ki_inline_vec;
+ kiocb->ki_iovec->iov_base = kiocb->ki_buf;
+ kiocb->ki_iovec->iov_len = kiocb->ki_left;
+ kiocb->ki_nr_segs = 1;
+ kiocb->ki_cur_seg = 0;
+ kiocb->ki_nbytes = kiocb->ki_left;
+ return 0;
+}
+
/*
* aio_setup_iocb:
* Performs the initial checks and aio retry method
@@ -1412,9 +1441,12 @@ static ssize_t aio_setup_iocb(struct kio
ret = security_file_permission(file, MAY_READ);
if (unlikely(ret))
break;
+ ret = aio_setup_single_vector(kiocb);
+ if (ret)
+ break;
ret = -EINVAL;
if (file->f_op->aio_read)
- kiocb->ki_retry = aio_pread;
+ kiocb->ki_retry = aio_rw_vect_retry;
break;
case IOCB_CMD_PWRITE:
ret = -EBADF;
@@ -1427,9 +1459,40 @@ static ssize_t aio_setup_iocb(struct kio
ret = security_file_permission(file, MAY_WRITE);
if (unlikely(ret))
break;
+ ret = aio_setup_single_vector(kiocb);
+ if (ret)
+ break;
+ ret = -EINVAL;
+ if (file->f_op->aio_write)
+ kiocb->ki_retry = aio_rw_vect_retry;
+ break;
+ case IOCB_CMD_PREADV:
+ ret = -EBADF;
+ if (unlikely(!(file->f_mode & FMODE_READ)))
+ break;
+ ret = security_file_permission(file, MAY_READ);
+ if (unlikely(ret))
+ break;
+ ret = aio_setup_vectored_rw(kiocb);
+ if (ret)
+ break;
+ ret = -EINVAL;
+ if (file->f_op->aio_read)
+ kiocb->ki_retry = aio_rw_vect_retry;
+ break;
+ case IOCB_CMD_PWRITEV:
+ ret = -EBADF;
+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
+ break;
+ ret = security_file_permission(file, MAY_WRITE);
+ if (unlikely(ret))
+ break;
+ ret = aio_setup_vectored_rw(kiocb);
+ if (ret)
+ break;
ret = -EINVAL;
if (file->f_op->aio_write)
- kiocb->ki_retry = aio_pwrite;
+ kiocb->ki_retry = aio_rw_vect_retry;
break;
case IOCB_CMD_FDSYNC:
ret = -EINVAL;
Index: linux-2.6.17-rc4/include/linux/aio.h
===================================================================
--- linux-2.6.17-rc4.orig/include/linux/aio.h 2006-05-15 14:09:44.400567824 -0700
+++ linux-2.6.17-rc4/include/linux/aio.h 2006-05-15 14:14:02.692301512 -0700
@@ -7,6 +7,7 @@
#include <linux/uio.h>

#include <asm/atomic.h>
+#include <linux/uio.h>

#define AIO_MAXSEGS 4
#define AIO_KIOGRP_NR_ATOMIC 8
@@ -114,6 +115,9 @@ struct kiocb {
long ki_kicked; /* just for testing */
long ki_queued; /* just for testing */
struct iovec ki_inline_vec; /* inline vector */
+ struct iovec *ki_iovec;
+ unsigned long ki_nr_segs;
+ unsigned long ki_cur_seg;

struct list_head ki_list; /* the aio core uses this
* for cancellation */
Index: linux-2.6.17-rc4/include/linux/aio_abi.h
===================================================================
--- linux-2.6.17-rc4.orig/include/linux/aio_abi.h 2006-05-15 14:09:44.400567824 -0700
+++ linux-2.6.17-rc4/include/linux/aio_abi.h 2006-05-15 14:14:02.692301512 -0700
@@ -41,6 +41,8 @@ enum {
* IOCB_CMD_POLL = 5,
*/
IOCB_CMD_NOOP = 6,
+ IOCB_CMD_PREADV = 7,
+ IOCB_CMD_PWRITEV = 8,
};

/* read() from /dev/aio returns these structures. */
Index: linux-2.6.17-rc4/fs/read_write.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/read_write.c 2006-05-15 14:14:00.584621928 -0700
+++ linux-2.6.17-rc4/fs/read_write.c 2006-05-15 14:14:02.693301360 -0700
@@ -507,6 +507,74 @@ ssize_t do_loop_readv_writev(struct file
return ret;
}

+ssize_t rw_copy_check_uvector(const struct iovec __user * uvector,
+ unsigned long nr_segs, unsigned long fast_segs,
+ struct iovec *fast_pointer,
+ struct iovec **ret_pointer)
+ {
+ unsigned long seg;
+ ssize_t ret;
+ struct iovec *iov = fast_pointer;
+
+ /*
+ * SuS says "The readv() function *may* fail if the iovcnt argument
+ * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
+ * traditionally returned zero for zero segments, so...
+ */
+ if (nr_segs == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * First get the "struct iovec" from user memory and
+ * verify all the pointers
+ */
+ if (nr_segs > UIO_MAXIOV) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (nr_segs > fast_segs) {
+ iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+ if (iov == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /*
+ * According to the Single Unix Specification we should return EINVAL
+ * if an element length is < 0 when cast to ssize_t or if the
+ * total length would overflow the ssize_t return value of the
+ * system call.
+ */
+ ret = 0;
+ for (seg = 0; seg < nr_segs; seg++) {
+ void __user *buf = iov[seg].iov_base;
+ ssize_t len = (ssize_t)iov[seg].iov_len;
+
+ /* see if we we're about to use an invalid len or if
+ * it's about to overflow ssize_t */
+ if (len < 0 || (ret + len < ret)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret += len;
+ }
+out:
+ *ret_pointer = iov;
+ return ret;
+}
+
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)

@@ -518,64 +586,20 @@ static ssize_t do_readv_writev(int type,
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
ssize_t ret;
- int seg;
io_fn_t fn;
iov_fn_t fnv;

- /*
- * SuS says "The readv() function *may* fail if the iovcnt argument
- * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
- * traditionally returned zero for zero segments, so...
- */
- ret = 0;
- if (nr_segs == 0)
+ if (!file->f_op) {
+ ret = -EINVAL;
goto out;
-
- /*
- * First get the "struct iovec" from user memory and
- * verify all the pointers
- */
- ret = -EINVAL;
- if (nr_segs > UIO_MAXIOV)
- goto out;
- if (!file->f_op)
- goto out;
- if (nr_segs > UIO_FASTIOV) {
- ret = -ENOMEM;
- iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
- if (!iov)
- goto out;
}
- ret = -EFAULT;
- if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector)))
- goto out;

- /*
- * Single unix specification:
- * We should -EINVAL if an element length is not >= 0 and fitting an
- * ssize_t. The total length is fitting an ssize_t
- *
- * Be careful here because iov_len is a size_t not an ssize_t
- */
- tot_len = 0;
- ret = -EINVAL;
- for (seg = 0; seg < nr_segs; seg++) {
- void __user *buf = iov[seg].iov_base;
- ssize_t len = (ssize_t)iov[seg].iov_len;
-
- if (len < 0) /* size_t not fitting an ssize_t .. */
- goto out;
- if (unlikely(!access_ok(vrfy_dir(type), buf, len)))
- goto Efault;
- tot_len += len;
- if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */
- goto out;
- }
- if (tot_len == 0) {
- ret = 0;
+ ret = rw_copy_check_uvector(uvector, nr_segs, ARRAY_SIZE(iovstack),
+ iovstack, &iov);
+ if (ret <= 0)
goto out;
- }

+ tot_len = ret;
ret = rw_verify_area(type, file, pos, tot_len);
if (ret < 0)
goto out;
@@ -608,9 +632,6 @@ out:
fsnotify_modify(file->f_dentry);
}
return ret;
-Efault:
- ret = -EFAULT;
- goto out;
}

ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
Index: linux-2.6.17-rc4/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc4.orig/include/linux/fs.h 2006-05-15 14:14:00.587621472 -0700
+++ linux-2.6.17-rc4/include/linux/fs.h 2006-05-15 14:14:02.695301056 -0700
@@ -1068,6 +1068,11 @@ struct inode_operations {

struct seq_file;

+ssize_t rw_copy_check_uvector(const struct iovec __user * uvector,
+ unsigned long nr_segs, unsigned long fast_segs,
+ struct iovec *fast_pointer,
+ struct iovec **ret_pointer);
+
extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_readv(struct file *, const struct iovec __user *,


2006-05-15 21:21:05

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

This patch removes readv() and writev() methods and replaces
them with aio_read()/aio_write() methods.

Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>

Documentation/filesystems/vfs.txt | 12 +---
drivers/char/raw.c | 2
drivers/net/tun.c | 35 +++----------
fs/bad_inode.c | 2
fs/block_dev.c | 2
fs/cifs/cifsfs.c | 16 ------
fs/compat.c | 44 ++++------------
fs/ext2/file.c | 2
fs/ext3/file.c | 2
fs/fat/file.c | 2
fs/fuse/dev.c | 35 +++----------
fs/hostfs/hostfs_kern.c | 2
fs/jfs/file.c | 2
fs/ntfs/file.c | 2
fs/pipe.c | 51 +++++--------------
fs/read_write.c | 101 +++++++++++++++++++++++++-------------
fs/read_write.h | 14 +++++
fs/xfs/linux-2.6/xfs_file.c | 92 ----------------------------------
include/linux/fs.h | 6 --
mm/filemap.c | 36 -------------
net/socket.c | 40 ---------------
sound/core/pcm_native.c | 40 +++++++--------
22 files changed, 146 insertions(+), 394 deletions(-)

Index: linux-2.6.17-rc4/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc4.orig/drivers/char/raw.c 2006-05-15 13:56:19.717898184 -0700
+++ linux-2.6.17-rc4/drivers/char/raw.c 2006-05-15 14:09:43.037775000 -0700
@@ -258,8 +258,6 @@ static struct file_operations raw_fops =
.open = raw_open,
.release= raw_release,
.ioctl = raw_ioctl,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.owner = THIS_MODULE,
};

Index: linux-2.6.17-rc4/drivers/net/tun.c
===================================================================
--- linux-2.6.17-rc4.orig/drivers/net/tun.c 2006-05-15 13:56:19.717898184 -0700
+++ linux-2.6.17-rc4/drivers/net/tun.c 2006-05-15 13:57:10.996102712 -0700
@@ -289,11 +289,10 @@ static inline size_t iov_total(const str
return len;
}

-/* Writev */
-static ssize_t tun_chr_writev(struct file * file, const struct iovec *iv,
- unsigned long count, loff_t *pos)
+static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
{
- struct tun_struct *tun = file->private_data;
+ struct tun_struct *tun = iocb->ki_filp->private_data;

if (!tun)
return -EBADFD;
@@ -303,14 +302,6 @@ static ssize_t tun_chr_writev(struct fil
return tun_get_user(tun, (struct iovec *) iv, iov_total(iv, count));
}

-/* Write */
-static ssize_t tun_chr_write(struct file * file, const char __user * buf,
- size_t count, loff_t *pos)
-{
- struct iovec iv = { (void __user *) buf, count };
- return tun_chr_writev(file, &iv, 1, pos);
-}
-
/* Put packet to the user space buffer */
static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
struct sk_buff *skb,
@@ -344,10 +335,10 @@ static __inline__ ssize_t tun_put_user(s
return total;
}

-/* Readv */
-static ssize_t tun_chr_readv(struct file *file, const struct iovec *iv,
- unsigned long count, loff_t *pos)
+static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
{
+ struct file *file = iocb->ki_filp;
struct tun_struct *tun = file->private_data;
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
@@ -427,14 +418,6 @@ static ssize_t tun_chr_readv(struct file
return ret;
}

-/* Read */
-static ssize_t tun_chr_read(struct file * file, char __user * buf,
- size_t count, loff_t *pos)
-{
- struct iovec iv = { buf, count };
- return tun_chr_readv(file, &iv, 1, pos);
-}
-
static void tun_setup(struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
@@ -762,10 +745,8 @@ static int tun_chr_close(struct inode *i
static struct file_operations tun_fops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
- .read = tun_chr_read,
- .readv = tun_chr_readv,
- .write = tun_chr_write,
- .writev = tun_chr_writev,
+ .aio_read = tun_chr_aio_read,
+ .aio_write = tun_chr_aio_write,
.poll = tun_chr_poll,
.ioctl = tun_chr_ioctl,
.open = tun_chr_open,
Index: linux-2.6.17-rc4/fs/bad_inode.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/bad_inode.c 2006-05-15 13:56:19.701900616 -0700
+++ linux-2.6.17-rc4/fs/bad_inode.c 2006-05-15 13:57:10.996102712 -0700
@@ -40,8 +40,6 @@ static const struct file_operations bad_
.aio_fsync = EIO_ERROR,
.fasync = EIO_ERROR,
.lock = EIO_ERROR,
- .readv = EIO_ERROR,
- .writev = EIO_ERROR,
.sendfile = EIO_ERROR,
.sendpage = EIO_ERROR,
.get_unmapped_area = EIO_ERROR,
Index: linux-2.6.17-rc4/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/block_dev.c 2006-05-15 13:56:19.700900768 -0700
+++ linux-2.6.17-rc4/fs/block_dev.c 2006-05-15 14:09:43.034775456 -0700
@@ -1093,8 +1093,6 @@ const struct file_operations def_blk_fop
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
- .readv = generic_file_readv,
- .writev = generic_file_write_nolock,
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
Index: linux-2.6.17-rc4/fs/cifs/cifsfs.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/cifs/cifsfs.c 2006-05-15 13:56:19.698901072 -0700
+++ linux-2.6.17-rc4/fs/cifs/cifsfs.c 2006-05-15 13:57:10.998102408 -0700
@@ -484,18 +484,6 @@ cifs_get_sb(struct file_system_type *fs_
return sb;
}

-static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct inode *inode = file->f_dentry->d_inode;
- ssize_t written;
-
- written = generic_file_writev(file, iov, nr_segs, ppos);
- if (!CIFS_I(inode)->clientCanCacheAll)
- filemap_fdatawrite(inode->i_mapping);
- return written;
-}
-
static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -581,8 +569,6 @@ struct inode_operations cifs_symlink_ino
const struct file_operations cifs_file_ops = {
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = cifs_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = cifs_file_aio_write,
.open = cifs_open,
@@ -624,8 +610,6 @@ const struct file_operations cifs_file_d
const struct file_operations cifs_file_nobrl_ops = {
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = cifs_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = cifs_file_aio_write,
.open = cifs_open,
Index: linux-2.6.17-rc4/fs/compat.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/compat.c 2006-05-15 13:56:19.701900616 -0700
+++ linux-2.6.17-rc4/fs/compat.c 2006-05-15 13:57:11.000102104 -0700
@@ -55,6 +55,8 @@

extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);

+#include "read_write.h"
+
/*
* Not all architectures have sys_utime, so implement this in terms
* of sys_utimes.
@@ -1139,9 +1141,6 @@ static ssize_t compat_do_readv_writev(in
const struct compat_iovec __user *uvector,
unsigned long nr_segs, loff_t *pos)
{
- typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
- typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *);
-
compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov=iovstack, *vector;
@@ -1224,39 +1223,18 @@ static ssize_t compat_do_readv_writev(in
fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->readv;
+ fnv = file->f_op->aio_read;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->writev;
- }
- if (fnv) {
- ret = fnv(file, iov, nr_segs, pos);
- goto out;
+ fnv = file->f_op->aio_write;
}

- /* Do it by hand, with file-ops */
- ret = 0;
- vector = iov;
- while (nr_segs > 0) {
- void __user * base;
- size_t len;
- ssize_t nr;
-
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(file, base, len, pos);
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+ pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);

- if (nr < 0) {
- if (!ret) ret = nr;
- break;
- }
- ret += nr;
- if (nr != len)
- break;
- }
out:
if (iov != iovstack)
kfree(iov);
@@ -1284,7 +1262,7 @@ compat_sys_readv(unsigned long fd, const
goto out;

ret = -EINVAL;
- if (!file->f_op || (!file->f_op->readv && !file->f_op->read))
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
goto out;

ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos);
@@ -1307,7 +1285,7 @@ compat_sys_writev(unsigned long fd, cons
goto out;

ret = -EINVAL;
- if (!file->f_op || (!file->f_op->writev && !file->f_op->write))
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
goto out;

ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos);
Index: linux-2.6.17-rc4/fs/ext2/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ext2/file.c 2006-05-15 13:56:19.699900920 -0700
+++ linux-2.6.17-rc4/fs/ext2/file.c 2006-05-15 14:09:43.031775912 -0700
@@ -50,8 +50,6 @@ const struct file_operations ext2_file_o
.open = generic_file_open,
.release = ext2_release_file,
.fsync = ext2_sync_file,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
Index: linux-2.6.17-rc4/fs/ext3/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ext3/file.c 2006-05-15 13:56:19.699900920 -0700
+++ linux-2.6.17-rc4/fs/ext3/file.c 2006-05-15 13:57:11.001101952 -0700
@@ -112,8 +112,6 @@ const struct file_operations ext3_file_o
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = ext3_file_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.ioctl = ext3_ioctl,
.mmap = generic_file_mmap,
.open = generic_file_open,
Index: linux-2.6.17-rc4/fs/fat/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/fat/file.c 2006-05-15 13:56:19.697901224 -0700
+++ linux-2.6.17-rc4/fs/fat/file.c 2006-05-15 13:57:11.002101800 -0700
@@ -116,8 +116,6 @@ const struct file_operations fat_file_op
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
Index: linux-2.6.17-rc4/fs/fuse/dev.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/fuse/dev.c 2006-05-15 13:56:19.700900768 -0700
+++ linux-2.6.17-rc4/fs/fuse/dev.c 2006-05-15 13:57:11.003101648 -0700
@@ -585,14 +585,15 @@ static void request_wait(struct fuse_con
* request_end(). Otherwise add it to the processing list, and set
* the 'sent' flag.
*/
-static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *off)
+static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
int err;
struct fuse_req *req;
struct fuse_in *in;
struct fuse_copy_state cs;
unsigned reqsize;
+ struct file *file = iocb->ki_filp;
struct fuse_conn *fc = fuse_get_conn(file);
if (!fc)
return -EPERM;
@@ -658,15 +659,6 @@ static ssize_t fuse_dev_readv(struct fil
return err;
}

-static ssize_t fuse_dev_read(struct file *file, char __user *buf,
- size_t nbytes, loff_t *off)
-{
- struct iovec iov;
- iov.iov_len = nbytes;
- iov.iov_base = buf;
- return fuse_dev_readv(file, &iov, 1, off);
-}
-
/* Look up request on processing list by unique ID */
static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
{
@@ -711,15 +703,15 @@ static int copy_out_args(struct fuse_cop
* it from the list and copy the rest of the buffer to the request.
* The request is finished by calling request_end()
*/
-static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *off)
+static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
int err;
unsigned nbytes = iov_length(iov, nr_segs);
struct fuse_req *req;
struct fuse_out_header oh;
struct fuse_copy_state cs;
- struct fuse_conn *fc = fuse_get_conn(file);
+ struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
if (!fc)
return -EPERM;

@@ -779,15 +771,6 @@ static ssize_t fuse_dev_writev(struct fi
return err;
}

-static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
- size_t nbytes, loff_t *off)
-{
- struct iovec iov;
- iov.iov_len = nbytes;
- iov.iov_base = (char __user *) buf;
- return fuse_dev_writev(file, &iov, 1, off);
-}
-
static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
{
unsigned mask = POLLOUT | POLLWRNORM;
@@ -921,10 +904,8 @@ static int fuse_dev_fasync(int fd, struc
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
.llseek = no_llseek,
- .read = fuse_dev_read,
- .readv = fuse_dev_readv,
- .write = fuse_dev_write,
- .writev = fuse_dev_writev,
+ .aio_read = fuse_dev_read,
+ .aio_write = fuse_dev_write,
.poll = fuse_dev_poll,
.release = fuse_dev_release,
.fasync = fuse_dev_fasync,
Index: linux-2.6.17-rc4/fs/hostfs/hostfs_kern.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/hostfs/hostfs_kern.c 2006-05-15 13:56:19.701900616 -0700
+++ linux-2.6.17-rc4/fs/hostfs/hostfs_kern.c 2006-05-15 14:09:43.036775152 -0700
@@ -390,8 +390,6 @@ static const struct file_operations host
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.write = generic_file_write,
.mmap = generic_file_mmap,
.open = hostfs_file_open,
Index: linux-2.6.17-rc4/fs/jfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/jfs/file.c 2006-05-15 13:56:19.697901224 -0700
+++ linux-2.6.17-rc4/fs/jfs/file.c 2006-05-15 14:09:43.029776216 -0700
@@ -108,8 +108,6 @@ const struct file_operations jfs_file_op
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.sendfile = generic_file_sendfile,
.fsync = jfs_fsync,
.release = jfs_release,
Index: linux-2.6.17-rc4/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ntfs/file.c 2006-05-15 13:56:19.700900768 -0700
+++ linux-2.6.17-rc4/fs/ntfs/file.c 2006-05-15 14:09:43.033775608 -0700
@@ -2296,11 +2296,9 @@ const struct file_operations ntfs_file_o
.llseek = generic_file_llseek, /* Seek inside file. */
.read = generic_file_read, /* Read from file. */
.aio_read = generic_file_aio_read, /* Async read from file. */
- .readv = generic_file_readv, /* Read from file. */
#ifdef NTFS_RW
.write = ntfs_file_write, /* Write to file. */
.aio_write = ntfs_file_aio_write, /* Async write to file. */
- .writev = ntfs_file_writev, /* Write to file. */
/*.release = ,*/ /* Last file is closed. See
fs/ext2/file.c::
ext2_release_file() for
Index: linux-2.6.17-rc4/fs/pipe.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/pipe.c 2006-05-15 13:56:19.702900464 -0700
+++ linux-2.6.17-rc4/fs/pipe.c 2006-05-15 13:57:11.007101040 -0700
@@ -218,9 +218,10 @@ static struct pipe_buf_operations anon_p
};

static ssize_t
-pipe_readv(struct file *filp, const struct iovec *_iov,
- unsigned long nr_segs, loff_t *ppos)
+pipe_read(struct kiocb *iocb, const struct iovec *_iov,
+ unsigned long nr_segs, loff_t pos)
{
+ struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
struct pipe_inode_info *pipe;
int do_wakeup;
@@ -330,17 +331,10 @@ redo:
}

static ssize_t
-pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
- struct iovec iov = { .iov_base = buf, .iov_len = count };
-
- return pipe_readv(filp, &iov, 1, ppos);
-}
-
-static ssize_t
-pipe_writev(struct file *filp, const struct iovec *_iov,
- unsigned long nr_segs, loff_t *ppos)
+pipe_write(struct kiocb *iocb, const struct iovec *_iov,
+ unsigned long nr_segs, loff_t ppos)
{
+ struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
struct pipe_inode_info *pipe;
ssize_t ret;
@@ -510,15 +504,6 @@ out:
}

static ssize_t
-pipe_write(struct file *filp, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return pipe_writev(filp, &iov, 1, ppos);
-}
-
-static ssize_t
bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{
return -EBADF;
@@ -736,8 +721,7 @@ pipe_rdwr_open(struct inode *inode, stru
*/
const struct file_operations read_fifo_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
+ .aio_read = pipe_read,
.write = bad_pipe_w,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
@@ -749,8 +733,7 @@ const struct file_operations read_fifo_f
const struct file_operations write_fifo_fops = {
.llseek = no_llseek,
.read = bad_pipe_r,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_write_open,
@@ -760,10 +743,8 @@ const struct file_operations write_fifo_

const struct file_operations rdwr_fifo_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_read = pipe_read,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
@@ -773,8 +754,7 @@ const struct file_operations rdwr_fifo_f

static struct file_operations read_pipe_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
+ .aio_read = pipe_read,
.write = bad_pipe_w,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
@@ -786,8 +766,7 @@ static struct file_operations read_pipe_
static struct file_operations write_pipe_fops = {
.llseek = no_llseek,
.read = bad_pipe_r,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_write_open,
@@ -797,10 +776,8 @@ static struct file_operations write_pipe

static struct file_operations rdwr_pipe_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
- .write = pipe_write,
- .writev = pipe_writev,
+ .aio_read = pipe_read,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
Index: linux-2.6.17-rc4/fs/read_write.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/read_write.c 2006-05-15 13:56:19.702900464 -0700
+++ linux-2.6.17-rc4/fs/read_write.c 2006-05-15 14:09:44.399567976 -0700
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
+#include "read_write.h"

#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -450,6 +451,62 @@ unsigned long iov_shorten(struct iovec *

EXPORT_SYMBOL(iov_shorten);

+ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
+{
+ struct kiocb kiocb;
+ ssize_t ret;
+
+ init_sync_kiocb(&kiocb, filp);
+ kiocb.ki_pos = *ppos;
+ kiocb.ki_left = len;
+ kiocb.ki_nbytes = len;
+
+ for (;;) {
+ ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
+ wait_on_retry_sync_kiocb(&kiocb);
+ }
+
+ if (ret == -EIOCBQUEUED)
+ ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
+ return ret;
+}
+
+/* Do it by hand, with file-ops */
+ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
+{
+ struct iovec *vector = iov;
+ ssize_t ret = 0;
+
+ while (nr_segs > 0) {
+ void __user *base;
+ size_t len;
+ ssize_t nr;
+
+ base = vector->iov_base;
+ len = vector->iov_len;
+ vector++;
+ nr_segs--;
+
+ nr = fn(filp, base, len, ppos);
+
+ if (nr < 0) {
+ if (!ret)
+ ret = nr;
+ break;
+ }
+ ret += nr;
+ if (nr != len)
+ break;
+ }
+
+ return ret;
+}
+
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)

@@ -457,12 +514,9 @@ static ssize_t do_readv_writev(int type,
const struct iovec __user * uvector,
unsigned long nr_segs, loff_t *pos)
{
- typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
- typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *);
-
size_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov=iovstack, *vector;
+ struct iovec *iov = iovstack;
ssize_t ret;
int seg;
io_fn_t fn;
@@ -532,39 +586,18 @@ static ssize_t do_readv_writev(int type,
fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->readv;
+ fnv = file->f_op->aio_read;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->writev;
- }
- if (fnv) {
- ret = fnv(file, iov, nr_segs, pos);
- goto out;
+ fnv = file->f_op->aio_write;
}

- /* Do it by hand, with file-ops */
- ret = 0;
- vector = iov;
- while (nr_segs > 0) {
- void __user * base;
- size_t len;
- ssize_t nr;
-
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(file, base, len, pos);
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+ pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);

- if (nr < 0) {
- if (!ret) ret = nr;
- break;
- }
- ret += nr;
- if (nr != len)
- break;
- }
out:
if (iov != iovstack)
kfree(iov);
@@ -585,7 +618,7 @@ ssize_t vfs_readv(struct file *file, con
{
if (!(file->f_mode & FMODE_READ))
return -EBADF;
- if (!file->f_op || (!file->f_op->readv && !file->f_op->read))
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
return -EINVAL;

return do_readv_writev(READ, file, vec, vlen, pos);
@@ -598,7 +631,7 @@ ssize_t vfs_writev(struct file *file, co
{
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
- if (!file->f_op || (!file->f_op->writev && !file->f_op->write))
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
return -EINVAL;

return do_readv_writev(WRITE, file, vec, vlen, pos);
Index: linux-2.6.17-rc4/fs/read_write.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17-rc4/fs/read_write.h 2006-05-15 13:57:11.008100888 -0700
@@ -0,0 +1,14 @@
+/*
+ * This file is only for sharing some helpers from read_write.c with compat.c.
+ * Don't use anywhere else.
+ */
+
+
+typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
+typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+
+ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
+ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
Index: linux-2.6.17-rc4/fs/xfs/linux-2.6/xfs_file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/xfs/linux-2.6/xfs_file.c 2006-05-15 13:56:19.698901072 -0700
+++ linux-2.6.17-rc4/fs/xfs/linux-2.6/xfs_file.c 2006-05-15 13:57:11.009100736 -0700
@@ -129,94 +129,6 @@ xfs_file_aio_write_invis(
return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

-STATIC inline ssize_t
-__xfs_file_readv(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = vn_from_inode(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
-
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
- VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-xfs_file_readv(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_readv(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-xfs_file_readv_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
-STATIC inline ssize_t
-__xfs_file_writev(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = vn_from_inode(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
-
- VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-xfs_file_writev(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_writev(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-xfs_file_writev_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_writev(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
STATIC ssize_t
xfs_file_sendfile(
struct file *filp,
@@ -577,8 +489,6 @@ const struct file_operations xfs_file_op
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = xfs_file_readv,
- .writev = xfs_file_writev,
.aio_read = xfs_file_aio_read,
.aio_write = xfs_file_aio_write,
.sendfile = xfs_file_sendfile,
@@ -601,8 +511,6 @@ const struct file_operations xfs_invis_f
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = xfs_file_readv_invis,
- .writev = xfs_file_writev_invis,
.aio_read = xfs_file_aio_read_invis,
.aio_write = xfs_file_aio_write_invis,
.sendfile = xfs_file_sendfile_invis,
Index: linux-2.6.17-rc4/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc4.orig/include/linux/fs.h 2006-05-15 13:56:19.717898184 -0700
+++ linux-2.6.17-rc4/include/linux/fs.h 2006-05-15 14:09:44.400567824 -0700
@@ -1031,8 +1031,6 @@ struct file_operations {
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
- ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
@@ -1624,10 +1622,6 @@ extern long do_splice_direct(struct file

extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
-extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
-ssize_t generic_file_writev(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
extern loff_t remote_llseek(struct file *file, loff_t offset, int origin);
Index: linux-2.6.17-rc4/mm/filemap.c
===================================================================
--- linux-2.6.17-rc4.orig/mm/filemap.c 2006-05-15 13:56:19.703900312 -0700
+++ linux-2.6.17-rc4/mm/filemap.c 2006-05-15 14:09:43.037775000 -0700
@@ -2298,42 +2298,6 @@ ssize_t generic_file_write(struct file *
}
EXPORT_SYMBOL(generic_file_write);

-ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- ret = __generic_file_aio_read(&kiocb, iov, nr_segs, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-EXPORT_SYMBOL(generic_file_readv);
-
-ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret;
-
- mutex_lock(&inode->i_mutex);
- ret = __generic_file_write_nolock(file, iov, nr_segs, ppos);
- mutex_unlock(&inode->i_mutex);
-
- if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
-
- err = sync_page_range(inode, mapping, *ppos - ret, ret);
- if (err < 0)
- ret = err;
- }
- return ret;
-}
-EXPORT_SYMBOL(generic_file_writev);
-
/*
* Called under i_mutex for writes to S_ISREG files. Returns -EIO if something
* went wrong during pagecache shootdown.
Index: linux-2.6.17-rc4/net/socket.c
===================================================================
--- linux-2.6.17-rc4.orig/net/socket.c 2006-05-15 13:56:19.716898336 -0700
+++ linux-2.6.17-rc4/net/socket.c 2006-05-15 13:57:11.014099976 -0700
@@ -112,10 +112,6 @@ static long compat_sock_ioctl(struct fil
unsigned int cmd, unsigned long arg);
#endif
static int sock_fasync(int fd, struct file *filp, int on);
-static ssize_t sock_readv(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
-static ssize_t sock_writev(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more);

@@ -138,8 +134,6 @@ static struct file_operations socket_fil
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
- .readv = sock_readv,
- .writev = sock_writev,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
};
@@ -738,23 +732,6 @@ static ssize_t do_sock_read(struct msghd
return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
}

-static ssize_t sock_readv(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb iocb;
- struct sock_iocb siocb;
- struct msghdr msg;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- iocb.private = &siocb;
-
- ret = do_sock_read(&msg, &iocb, file, iov, nr_segs);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
-}
-
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -797,23 +774,6 @@ static ssize_t do_sock_write(struct msgh
return __sock_sendmsg(iocb, sock, msg, size);
}

-static ssize_t sock_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct msghdr msg;
- struct kiocb iocb;
- struct sock_iocb siocb;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- iocb.private = &siocb;
-
- ret = do_sock_write(&msg, &iocb, file, iov, nr_segs);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
-}
-
static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
Index: linux-2.6.17-rc4/sound/core/pcm_native.c
===================================================================
--- linux-2.6.17-rc4.orig/sound/core/pcm_native.c 2006-05-15 13:56:19.716898336 -0700
+++ linux-2.6.17-rc4/sound/core/pcm_native.c 2006-05-15 13:57:11.016099672 -0700
@@ -2819,8 +2819,8 @@ static ssize_t snd_pcm_write(struct file
return result;
}

-static ssize_t snd_pcm_readv(struct file *file, const struct iovec *_vector,
- unsigned long count, loff_t * offset)
+static ssize_t snd_pcm_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)

{
struct snd_pcm_file *pcm_file;
@@ -2831,22 +2831,22 @@ static ssize_t snd_pcm_readv(struct file
void __user **bufs;
snd_pcm_uframes_t frames;

- pcm_file = file->private_data;
+ pcm_file = iocb->ki_filp->private_data;
substream = pcm_file->substream;
snd_assert(substream != NULL, return -ENXIO);
runtime = substream->runtime;
if (runtime->status->state == SNDRV_PCM_STATE_OPEN)
return -EBADFD;
- if (count > 1024 || count != runtime->channels)
+ if (nr_segs > 1024 || nr_segs != runtime->channels)
return -EINVAL;
- if (!frame_aligned(runtime, _vector->iov_len))
+ if (!frame_aligned(runtime, iov->iov_len))
return -EINVAL;
- frames = bytes_to_samples(runtime, _vector->iov_len);
- bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL);
+ frames = bytes_to_samples(runtime, iov->iov_len);
+ bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
- for (i = 0; i < count; ++i)
- bufs[i] = _vector[i].iov_base;
+ for (i = 0; i < nr_segs; ++i)
+ bufs[i] = iov[i].iov_base;
result = snd_pcm_lib_readv(substream, bufs, frames);
if (result > 0)
result = frames_to_bytes(runtime, result);
@@ -2854,8 +2854,8 @@ static ssize_t snd_pcm_readv(struct file
return result;
}

-static ssize_t snd_pcm_writev(struct file *file, const struct iovec *_vector,
- unsigned long count, loff_t * offset)
+static ssize_t snd_pcm_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct snd_pcm_file *pcm_file;
struct snd_pcm_substream *substream;
@@ -2865,7 +2865,7 @@ static ssize_t snd_pcm_writev(struct fil
void __user **bufs;
snd_pcm_uframes_t frames;

- pcm_file = file->private_data;
+ pcm_file = iocb->ki_filp->private_data;
substream = pcm_file->substream;
snd_assert(substream != NULL, result = -ENXIO; goto end);
runtime = substream->runtime;
@@ -2873,17 +2873,17 @@ static ssize_t snd_pcm_writev(struct fil
result = -EBADFD;
goto end;
}
- if (count > 128 || count != runtime->channels ||
- !frame_aligned(runtime, _vector->iov_len)) {
+ if (nr_segs > 128 || nr_segs != runtime->channels ||
+ !frame_aligned(runtime, iov->iov_len)) {
result = -EINVAL;
goto end;
}
- frames = bytes_to_samples(runtime, _vector->iov_len);
- bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL);
+ frames = bytes_to_samples(runtime, iov->iov_len);
+ bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
- for (i = 0; i < count; ++i)
- bufs[i] = _vector[i].iov_base;
+ for (i = 0; i < nr_segs; ++i)
+ bufs[i] = iov[i].iov_base;
result = snd_pcm_lib_writev(substream, bufs, frames);
if (result > 0)
result = frames_to_bytes(runtime, result);
@@ -3389,7 +3389,7 @@ struct file_operations snd_pcm_f_ops[2]
{
.owner = THIS_MODULE,
.write = snd_pcm_write,
- .writev = snd_pcm_writev,
+ .aio_write = snd_pcm_aio_write,
.open = snd_pcm_playback_open,
.release = snd_pcm_release,
.poll = snd_pcm_playback_poll,
@@ -3401,7 +3401,7 @@ struct file_operations snd_pcm_f_ops[2]
{
.owner = THIS_MODULE,
.read = snd_pcm_read,
- .readv = snd_pcm_readv,
+ .aio_read = snd_pcm_aio_read,
.open = snd_pcm_capture_open,
.release = snd_pcm_release,
.poll = snd_pcm_capture_poll,
Index: linux-2.6.17-rc4/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.17-rc4.orig/Documentation/filesystems/vfs.txt 2006-05-12 09:39:48.000000000 -0700
+++ linux-2.6.17-rc4/Documentation/filesystems/vfs.txt 2006-05-15 14:13:24.374126760 -0700
@@ -715,8 +715,6 @@ struct file_operations {
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
- ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
@@ -736,11 +734,13 @@ otherwise noted.

read: called by read(2) and related system calls

- aio_read: called by io_submit(2) and other asynchronous I/O operations
+ aio_read: called by io_submit(2) and other asynchronous I/O operations,
+ also called to support readv(2) system call

write: called by write(2) and related system calls

- aio_write: called by io_submit(2) and other asynchronous I/O operations
+ aio_write: called by io_submit(2) and other asynchronous I/O operations,
+ also called to support writev(2) system call

readdir: called when the VFS needs to read the directory contents

@@ -780,10 +780,6 @@ otherwise noted.
lock: called by the fcntl(2) system call for F_GETLK, F_SETLK, and F_SETLKW
commands

- readv: called by the readv(2) system call
-
- writev: called by the writev(2) system call
-
sendfile: called by the sendfile(2) system call

get_unmapped_area: called by the mmap(2) system call


2006-05-15 21:22:17

by Badari Pulavarty

[permalink] [raw]
Subject: [PATCH 4/4] Streamline generic_file_* interfaces and filemap cleanups

This patch cleans up generic_file_*_read/write() interfaces.
Christoph Hellwig gave me the idea for this clean ups.

In a nutshell, all filesystems should set .aio_read/.aio_write
methods and use do_sync_read/ do_sync_write() as their .read/.write
methods. This allows us to cleanup all variants of generic_file_*
routines.

Final available interfaces:

generic_file_aio_read() - read handler
generic_file_aio_write() - write handler
generic_file_aio_write_nolock() - no lock write handler

__generic_file_aio_write_nolock() - internal worker routine


Signed-off-by: Badari Pulavarty <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>

drivers/char/raw.c | 15 +------
fs/adfs/file.c | 6 ++-
fs/affs/file.c | 6 ++-
fs/bfs/file.c | 6 ++-
fs/block_dev.c | 12 +-----
fs/ext2/file.c | 4 +-
fs/fuse/file.c | 6 ++-
fs/hfs/inode.c | 6 ++-
fs/hfsplus/inode.c | 6 ++-
fs/hostfs/hostfs_kern.c | 4 +-
fs/hpfs/file.c | 6 ++-
fs/jffs/inode-v23.c | 6 ++-
fs/jffs2/file.c | 6 ++-
fs/jfs/file.c | 4 +-
fs/minix/file.c | 6 ++-
fs/ntfs/file.c | 2 -
fs/qnx4/file.c | 6 ++-
fs/ramfs/file-mmu.c | 6 ++-
fs/ramfs/file-nommu.c | 6 ++-
fs/read_write.c | 3 +
fs/smbfs/file.c | 24 +++++++-----
fs/sysv/file.c | 6 ++-
fs/udf/file.c | 16 +++++---
fs/ufs/file.c | 6 ++-
fs/xfs/linux-2.6/xfs_lrw.c | 4 +-
include/linux/fs.h | 5 --
mm/filemap.c | 88 ++-------------------------------------------
27 files changed, 105 insertions(+), 166 deletions(-)

Index: linux-2.6.17-rc4/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc4.orig/drivers/char/raw.c 2006-05-15 14:14:00.570624056 -0700
+++ linux-2.6.17-rc4/drivers/char/raw.c 2006-05-15 14:14:12.642788808 -0700
@@ -239,21 +239,10 @@ out:
return err;
}

-static ssize_t raw_file_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct iovec local_iov = {
- .iov_base = (char __user *)buf,
- .iov_len = count
- };
-
- return generic_file_write_nolock(file, &local_iov, 1, ppos);
-}
-
static struct file_operations raw_fops = {
- .read = generic_file_read,
+ .read = do_sync_read,
.aio_read = generic_file_aio_read,
- .write = raw_file_write,
+ .write = do_sync_write,
.aio_write = generic_file_aio_write_nolock,
.open = raw_open,
.release= raw_release,
Index: linux-2.6.17-rc4/fs/adfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/adfs/file.c 2006-05-15 14:09:43.030776064 -0700
+++ linux-2.6.17-rc4/fs/adfs/file.c 2006-05-15 14:14:12.642788808 -0700
@@ -27,10 +27,12 @@

const struct file_operations adfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
.fsync = file_fsync,
- .write = generic_file_write,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.sendfile = generic_file_sendfile,
};

Index: linux-2.6.17-rc4/fs/affs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/affs/file.c 2006-05-15 14:09:43.031775912 -0700
+++ linux-2.6.17-rc4/fs/affs/file.c 2006-05-15 14:14:12.644788504 -0700
@@ -27,8 +27,10 @@ static int affs_file_release(struct inod

const struct file_operations affs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.open = affs_file_open,
.release = affs_file_release,
Index: linux-2.6.17-rc4/fs/bfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/bfs/file.c 2006-05-15 14:09:43.028776368 -0700
+++ linux-2.6.17-rc4/fs/bfs/file.c 2006-05-15 14:14:12.644788504 -0700
@@ -19,8 +19,10 @@

const struct file_operations bfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
};
Index: linux-2.6.17-rc4/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/block_dev.c 2006-05-15 14:14:00.574623448 -0700
+++ linux-2.6.17-rc4/fs/block_dev.c 2006-05-15 14:14:12.645788352 -0700
@@ -1056,14 +1056,6 @@ static int blkdev_close(struct inode * i
return blkdev_put(bdev);
}

-static ssize_t blkdev_file_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return generic_file_write_nolock(file, &local_iov, 1, ppos);
-}
-
static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
@@ -1083,8 +1075,8 @@ const struct file_operations def_blk_fop
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
- .read = generic_file_read,
- .write = blkdev_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write_nolock,
.mmap = generic_file_mmap,
Index: linux-2.6.17-rc4/fs/ext2/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ext2/file.c 2006-05-15 14:14:00.577622992 -0700
+++ linux-2.6.17-rc4/fs/ext2/file.c 2006-05-15 14:14:12.646788200 -0700
@@ -41,8 +41,8 @@ static int ext2_release_file (struct ino
*/
const struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.ioctl = ext2_ioctl,
Index: linux-2.6.17-rc4/fs/fuse/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/fuse/file.c 2006-05-15 14:09:43.031775912 -0700
+++ linux-2.6.17-rc4/fs/fuse/file.c 2006-05-15 14:14:12.646788200 -0700
@@ -621,8 +621,10 @@ static int fuse_set_page_dirty(struct pa

static const struct file_operations fuse_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = fuse_file_mmap,
.open = fuse_open,
.flush = fuse_flush,
Index: linux-2.6.17-rc4/fs/hfs/inode.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/hfs/inode.c 2006-05-15 14:09:43.028776368 -0700
+++ linux-2.6.17-rc4/fs/hfs/inode.c 2006-05-15 14:14:12.647788048 -0700
@@ -603,8 +603,10 @@ int hfs_inode_setattr(struct dentry *den

static const struct file_operations hfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
.fsync = file_fsync,
Index: linux-2.6.17-rc4/fs/hfsplus/inode.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/hfsplus/inode.c 2006-05-15 14:09:43.036775152 -0700
+++ linux-2.6.17-rc4/fs/hfsplus/inode.c 2006-05-15 14:14:12.648787896 -0700
@@ -282,8 +282,10 @@ static struct inode_operations hfsplus_f

static const struct file_operations hfsplus_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
.fsync = file_fsync,
Index: linux-2.6.17-rc4/fs/hostfs/hostfs_kern.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/hostfs/hostfs_kern.c 2006-05-15 14:14:00.580622536 -0700
+++ linux-2.6.17-rc4/fs/hostfs/hostfs_kern.c 2006-05-15 14:14:12.649787744 -0700
@@ -386,11 +386,11 @@ int hostfs_fsync(struct file *file, stru

static const struct file_operations hostfs_file_fops = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
- .write = generic_file_write,
+ .write = do_sync_write,
.mmap = generic_file_mmap,
.open = hostfs_file_open,
.release = NULL,
Index: linux-2.6.17-rc4/fs/hpfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/hpfs/file.c 2006-05-15 14:09:43.032775760 -0700
+++ linux-2.6.17-rc4/fs/hpfs/file.c 2006-05-15 14:14:12.649787744 -0700
@@ -113,7 +113,7 @@ static ssize_t hpfs_file_write(struct fi
{
ssize_t retval;

- retval = generic_file_write(file, buf, count, ppos);
+ retval = do_sync_write(file, buf, count, ppos);
if (retval > 0)
hpfs_i(file->f_dentry->d_inode)->i_dirty = 1;
return retval;
@@ -122,8 +122,10 @@ static ssize_t hpfs_file_write(struct fi
const struct file_operations hpfs_file_ops =
{
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.write = hpfs_file_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.release = hpfs_file_release,
.fsync = hpfs_file_fsync,
Index: linux-2.6.17-rc4/fs/jffs/inode-v23.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/jffs/inode-v23.c 2006-05-15 14:09:43.032775760 -0700
+++ linux-2.6.17-rc4/fs/jffs/inode-v23.c 2006-05-15 14:14:12.651787440 -0700
@@ -1633,8 +1633,10 @@ static const struct file_operations jffs
{
.open = generic_file_open,
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.ioctl = jffs_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs_fsync,
Index: linux-2.6.17-rc4/fs/jffs2/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/jffs2/file.c 2006-05-15 14:09:43.034775456 -0700
+++ linux-2.6.17-rc4/fs/jffs2/file.c 2006-05-15 14:14:12.652787288 -0700
@@ -42,8 +42,10 @@ const struct file_operations jffs2_file_
{
.llseek = generic_file_llseek,
.open = generic_file_open,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.ioctl = jffs2_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs2_fsync,
Index: linux-2.6.17-rc4/fs/jfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/jfs/file.c 2006-05-15 14:14:00.580622536 -0700
+++ linux-2.6.17-rc4/fs/jfs/file.c 2006-05-15 14:14:12.652787288 -0700
@@ -103,8 +103,8 @@ struct inode_operations jfs_file_inode_o
const struct file_operations jfs_file_operations = {
.open = jfs_open,
.llseek = generic_file_llseek,
- .write = generic_file_write,
- .read = generic_file_read,
+ .write = do_sync_write,
+ .read = do_sync_read,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
Index: linux-2.6.17-rc4/fs/minix/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/minix/file.c 2006-05-15 14:09:43.034775456 -0700
+++ linux-2.6.17-rc4/fs/minix/file.c 2006-05-15 14:14:12.653787136 -0700
@@ -17,8 +17,10 @@ int minix_sync_file(struct file *, struc

const struct file_operations minix_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = minix_sync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6.17-rc4/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ntfs/file.c 2006-05-15 14:14:00.582622232 -0700
+++ linux-2.6.17-rc4/fs/ntfs/file.c 2006-05-15 14:14:12.654786984 -0700
@@ -2294,7 +2294,7 @@ static int ntfs_file_fsync(struct file *

const struct file_operations ntfs_file_ops = {
.llseek = generic_file_llseek, /* Seek inside file. */
- .read = generic_file_read, /* Read from file. */
+ .read = do_sync_read, /* Read from file. */
.aio_read = generic_file_aio_read, /* Async read from file. */
#ifdef NTFS_RW
.write = ntfs_file_write, /* Write to file. */
Index: linux-2.6.17-rc4/fs/qnx4/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/qnx4/file.c 2006-05-15 14:09:43.033775608 -0700
+++ linux-2.6.17-rc4/fs/qnx4/file.c 2006-05-15 14:14:12.655786832 -0700
@@ -22,11 +22,13 @@
const struct file_operations qnx4_file_operations =
{
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
#ifdef CONFIG_QNX4FS_RW
- .write = generic_file_write,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.fsync = qnx4_sync_file,
#endif
};
Index: linux-2.6.17-rc4/fs/ramfs/file-mmu.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ramfs/file-mmu.c 2006-05-15 14:09:43.035775304 -0700
+++ linux-2.6.17-rc4/fs/ramfs/file-mmu.c 2006-05-15 14:14:12.655786832 -0700
@@ -33,8 +33,10 @@ struct address_space_operations ramfs_ao
};

const struct file_operations ramfs_file_operations = {
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = simple_sync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6.17-rc4/fs/ramfs/file-nommu.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ramfs/file-nommu.c 2006-05-15 14:09:43.035775304 -0700
+++ linux-2.6.17-rc4/fs/ramfs/file-nommu.c 2006-05-15 14:14:12.656786680 -0700
@@ -36,8 +36,10 @@ struct address_space_operations ramfs_ao
const struct file_operations ramfs_file_operations = {
.mmap = ramfs_nommu_mmap,
.get_unmapped_area = ramfs_nommu_get_unmapped_area,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.fsync = simple_sync_file,
.sendfile = generic_file_sendfile,
.llseek = generic_file_llseek,
Index: linux-2.6.17-rc4/fs/read_write.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/read_write.c 2006-05-15 14:14:02.693301360 -0700
+++ linux-2.6.17-rc4/fs/read_write.c 2006-05-15 14:14:12.657786528 -0700
@@ -22,7 +22,8 @@

const struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_readonly_mmap,
.sendfile = generic_file_sendfile,
};
Index: linux-2.6.17-rc4/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc4.orig/include/linux/fs.h 2006-05-15 14:14:02.695301056 -0700
+++ linux-2.6.17-rc4/include/linux/fs.h 2006-05-15 14:14:12.658786376 -0700
@@ -1594,11 +1594,8 @@ extern int generic_file_mmap(struct file
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
-extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
-extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *);
extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
@@ -1608,8 +1605,6 @@ extern ssize_t generic_file_buffered_wri
unsigned long, loff_t, loff_t *, size_t, ssize_t);
extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
-ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
extern void do_generic_mapping_read(struct address_space *mapping,
struct file_ra_state *, struct file *,
Index: linux-2.6.17-rc4/mm/filemap.c
===================================================================
--- linux-2.6.17-rc4.orig/mm/filemap.c 2006-05-15 14:14:00.589621168 -0700
+++ linux-2.6.17-rc4/mm/filemap.c 2006-05-15 14:14:12.660786072 -0700
@@ -1050,13 +1050,14 @@ success:
* that can use the page cache directly.
*/
ssize_t
-__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct file *filp = iocb->ki_filp;
ssize_t retval;
unsigned long seg;
size_t count;
+ loff_t *ppos = &iocb->ki_pos;

count = 0;
for (seg = 0; seg < nr_segs; seg++) {
@@ -1080,7 +1081,7 @@ __generic_file_aio_read(struct kiocb *io

/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
if (filp->f_flags & O_DIRECT) {
- loff_t pos = *ppos, size;
+ loff_t size;
struct address_space *mapping;
struct inode *inode;

@@ -1125,33 +1126,8 @@ out:
return retval;
}

-EXPORT_SYMBOL(__generic_file_aio_read);
-
-ssize_t
-generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- BUG_ON(iocb->ki_pos != pos);
- return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos);
-}
EXPORT_SYMBOL(generic_file_aio_read);

-ssize_t
-generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
- struct iovec local_iov = { .iov_base = buf, .iov_len = count };
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-
-EXPORT_SYMBOL(generic_file_read);
-
int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
{
ssize_t written;
@@ -2217,38 +2193,6 @@ ssize_t generic_file_aio_write_nolock(st
}
EXPORT_SYMBOL(generic_file_aio_write_nolock);

-static ssize_t
-__generic_file_write_nolock(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-
-ssize_t
-generic_file_write_nolock(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, *ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- *ppos = kiocb.ki_pos;
- return ret;
-}
-EXPORT_SYMBOL(generic_file_write_nolock);
-
ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -2274,30 +2218,6 @@ ssize_t generic_file_aio_write(struct ki
}
EXPORT_SYMBOL(generic_file_aio_write);

-ssize_t generic_file_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret;
- struct iovec local_iov = { .iov_base = (void __user *)buf,
- .iov_len = count };
-
- mutex_lock(&inode->i_mutex);
- ret = __generic_file_write_nolock(file, &local_iov, 1, ppos);
- mutex_unlock(&inode->i_mutex);
-
- if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- ssize_t err;
-
- err = sync_page_range(inode, mapping, *ppos - ret, ret);
- if (err < 0)
- ret = err;
- }
- return ret;
-}
-EXPORT_SYMBOL(generic_file_write);
-
/*
* Called under i_mutex for writes to S_ISREG files. Returns -EIO if something
* went wrong during pagecache shootdown.
Index: linux-2.6.17-rc4/fs/xfs/linux-2.6/xfs_lrw.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/xfs/linux-2.6/xfs_lrw.c 2006-05-15 14:09:43.030776064 -0700
+++ linux-2.6.17-rc4/fs/xfs/linux-2.6/xfs_lrw.c 2006-05-15 14:14:12.661785920 -0700
@@ -276,7 +276,9 @@ xfs_read(

xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
(void *)iovp, segs, *offset, ioflags);
- ret = __generic_file_aio_read(iocb, iovp, segs, offset);
+
+ iocb->ki_pos = *offset;
+ ret = generic_file_aio_read(iocb, iovp, segs, *offset);
if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
ret = wait_on_sync_kiocb(iocb);
if (ret > 0)
Index: linux-2.6.17-rc4/fs/smbfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/smbfs/file.c 2006-05-15 14:09:43.035775304 -0700
+++ linux-2.6.17-rc4/fs/smbfs/file.c 2006-05-15 14:14:12.662785768 -0700
@@ -214,13 +214,15 @@ smb_updatepage(struct file *file, struct
}

static ssize_t
-smb_file_read(struct file * file, char __user * buf, size_t count, loff_t *ppos)
+smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
+ struct file * file = iocb->ki_filp;
struct dentry * dentry = file->f_dentry;
ssize_t status;

VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry),
- (unsigned long) count, (unsigned long) *ppos);
+ (unsigned long) iocb->ki_left, (unsigned long) pos);

status = smb_revalidate_inode(dentry);
if (status) {
@@ -233,7 +235,7 @@ smb_file_read(struct file * file, char _
(long)dentry->d_inode->i_size,
dentry->d_inode->i_flags, dentry->d_inode->i_atime);

- status = generic_file_read(file, buf, count, ppos);
+ status = generic_file_aio_read(iocb, iov, nr_segs, pos);
out:
return status;
}
@@ -317,14 +319,16 @@ struct address_space_operations smb_file
* Write to a file (through the page cache).
*/
static ssize_t
-smb_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
+ struct file * file = iocb->ki_filp;
struct dentry * dentry = file->f_dentry;
ssize_t result;

VERBOSE("file %s/%s, count=%lu@%lu\n",
DENTRY_PATH(dentry),
- (unsigned long) count, (unsigned long) *ppos);
+ (unsigned long) iocb->ki_left, (unsigned long) pos);

result = smb_revalidate_inode(dentry);
if (result) {
@@ -337,8 +341,8 @@ smb_file_write(struct file *file, const
if (result)
goto out;

- if (count > 0) {
- result = generic_file_write(file, buf, count, ppos);
+ if (iocb->ki_left > 0) {
+ result = generic_file_aio_write(iocb, iov, nr_segs, pos);
VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n",
(long) file->f_pos, (long) dentry->d_inode->i_size,
dentry->d_inode->i_mtime, dentry->d_inode->i_atime);
@@ -402,8 +406,10 @@ smb_file_permission(struct inode *inode,
const struct file_operations smb_file_operations =
{
.llseek = remote_llseek,
- .read = smb_file_read,
- .write = smb_file_write,
+ .read = do_sync_read,
+ .aio_read = smb_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = smb_file_aio_write,
.ioctl = smb_ioctl,
.mmap = smb_file_mmap,
.open = smb_file_open,
Index: linux-2.6.17-rc4/fs/sysv/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/sysv/file.c 2006-05-15 14:09:43.033775608 -0700
+++ linux-2.6.17-rc4/fs/sysv/file.c 2006-05-15 14:14:12.663785616 -0700
@@ -21,8 +21,10 @@
*/
const struct file_operations sysv_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = sysv_sync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6.17-rc4/fs/udf/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/udf/file.c 2006-05-15 14:09:43.029776216 -0700
+++ linux-2.6.17-rc4/fs/udf/file.c 2006-05-15 14:14:12.663785616 -0700
@@ -103,19 +103,21 @@ struct address_space_operations udf_adin
.commit_write = udf_adinicb_commit_write,
};

-static ssize_t udf_file_write(struct file * file, const char __user * buf,
- size_t count, loff_t *ppos)
+static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t ppos)
{
ssize_t retval;
+ struct file *file = iocb->ki_filp;
struct inode *inode = file->f_dentry->d_inode;
int err, pos;
+ size_t count = iocb->ki_left;

if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB)
{
if (file->f_flags & O_APPEND)
pos = inode->i_size;
else
- pos = *ppos;
+ pos = ppos;

if (inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) +
pos + count))
@@ -136,7 +138,7 @@ static ssize_t udf_file_write(struct fil
}
}

- retval = generic_file_write(file, buf, count, ppos);
+ retval = generic_file_aio_write(iocb, iov, nr_segs, ppos);

if (retval > 0)
mark_inode_dirty(inode);
@@ -249,11 +251,13 @@ static int udf_release_file(struct inode
}

const struct file_operations udf_file_operations = {
- .read = generic_file_read,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
.ioctl = udf_ioctl,
.open = generic_file_open,
.mmap = generic_file_mmap,
- .write = udf_file_write,
+ .write = do_sync_write,
+ .aio_write = udf_file_aio_write,
.release = udf_release_file,
.fsync = udf_fsync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6.17-rc4/fs/ufs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ufs/file.c 2006-05-15 14:09:43.029776216 -0700
+++ linux-2.6.17-rc4/fs/ufs/file.c 2006-05-15 14:14:12.664785464 -0700
@@ -33,8 +33,10 @@

const struct file_operations ufs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.open = generic_file_open,
.sendfile = generic_file_sendfile,


2006-05-15 22:28:58

by Nathan Scott

[permalink] [raw]
Subject: Re: [PATCH 4/4] Streamline generic_file_* interfaces and filemap cleanups

Hi Badari,

On Mon, May 15, 2006 at 02:23:26PM -0700, Badari Pulavarty wrote:
> This patch cleans up generic_file_*_read/write() interfaces.
> Christoph Hellwig gave me the idea for this clean ups.
>
> In a nutshell, all filesystems should set .aio_read/.aio_write
> methods and use do_sync_read/ do_sync_write() as their .read/.write

I know its not something you're introducing here, but the naming
convention do_sync_read/do_sync_write is pretty confused (with it
not actually being a sync write and all, in the usual case).
Any chance that could be renamed to something thats a bit clearer,
maybe generic_file_non_aio_read and generic_file_non_aio_write?
There don't seem to be many callsites (so not a huge change) and
it'd seem a good time to do it, alongside these other changes.

> methods. This allows us to cleanup all variants of generic_file_*
> routines.
>
> Final available interfaces:
>
> generic_file_aio_read() - read handler
> generic_file_aio_write() - write handler
> generic_file_aio_write_nolock() - no lock write handler

thanks!

--
Nathan

2006-05-15 22:40:10

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 4/4] Streamline generic_file_* interfaces and filemap cleanups

Nathan Scott <[email protected]> wrote:
>
> Any chance that could be renamed to something thats a bit clearer,
> maybe generic_file_non_aio_read and generic_file_non_aio_write?

I guess that logically, we should avoid the double-negative and use
generic_file_sio_*.

I dunno if we want to be that logical though ;)

2006-05-15 22:47:23

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 4/4] Streamline generic_file_* interfaces and filemap cleanups



Nathan Scott wrote:

>Hi Badari,
>
>On Mon, May 15, 2006 at 02:23:26PM -0700, Badari Pulavarty wrote:
>
>>This patch cleans up generic_file_*_read/write() interfaces.
>>Christoph Hellwig gave me the idea for this clean ups.
>>
>>In a nutshell, all filesystems should set .aio_read/.aio_write
>>methods and use do_sync_read/ do_sync_write() as their .read/.write
>>
>
>I know its not something you're introducing here, but the naming
>convention do_sync_read/do_sync_write is pretty confused (with it
>not actually being a sync write and all, in the usual case).
>Any chance that could be renamed to something thats a bit clearer,
>maybe generic_file_non_aio_read and generic_file_non_aio_write?
>There don't seem to be many callsites (so not a huge change) and
>it'd seem a good time to do it, alongside these other changes.
>

You mean "left-in-pagecache-not-really-written-to-disk" synchronous ?
Yeah. I see it..
I prefer, generic_file_aio_read_and_wait(),
generic_file_aio_write_and_wait() - but
its ugly also :(

I also have a small issue with the current do_sync_*() routines - if
some one calls it
without setting their ->aio_read()/->aio_write(), we panic. May be we
should add a BUG_ON(), but again I don't want to slow things down..

Thanks,
Badari



2006-05-15 22:57:14

by Nathan Scott

[permalink] [raw]
Subject: Re: [PATCH 4/4] Streamline generic_file_* interfaces and filemap cleanups

On Mon, May 15, 2006 at 03:42:40PM -0700, Andrew Morton wrote:
> Nathan Scott <[email protected]> wrote:
> > Any chance that could be renamed to something thats a bit clearer,
> > maybe generic_file_non_aio_read and generic_file_non_aio_write?
>
> I guess that logically, we should avoid the double-negative and use
> generic_file_sio_*.

"s" as in "sync"? But its not sync.

> I dunno if we want to be that logical though ;)

The real problem I guess is that "aio" isn't clear enough, as there
are different types of async io. Maybe generic_posix_aio_* versus
generic_file_aio_* - *shrug*, thats probably not much better really.
Pretty much anything would be better than do_sync_write (describing
buffered not-sync writes) though. :)

On Mon, May 15, 2006 at 03:47:15PM -0700, Badari Pulavarty wrote:
> You mean "left-in-pagecache-not-really-written-to-disk" synchronous ?

Heh - yes, thats the one (you have a contradiction in terms there -
if its the former, its not the latter ;)

> Yeah. I see it..
> I prefer, generic_file_aio_read_and_wait(),
> generic_file_aio_write_and_wait() - but

Well, yeah - maybe - getting a bit long winded, but thats possibly the
best option so far.

> I also have a small issue with the current do_sync_*() routines - if
> some one calls it
> without setting their ->aio_read()/->aio_write(), we panic. May be we

Hmm. I imagine the author of the fs code would quickly find out they'd
made that mistake though, and it'd fail in a fairly easily debuggable
way, so perhaps not really a big issue in practice.

cheers.

--
Nathan

2006-05-16 10:51:32

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 4/4] Streamline generic_file_* interfaces and filemap cleanups

On Tue, May 16, 2006 at 08:28:04AM +1000, Nathan Scott wrote:
> I know its not something you're introducing here, but the naming
> convention do_sync_read/do_sync_write is pretty confused (with it
> not actually being a sync write and all, in the usual case).
> Any chance that could be renamed to something thats a bit clearer,
> maybe generic_file_non_aio_read and generic_file_non_aio_write?
> There don't seem to be many callsites (so not a huge change) and
> it'd seem a good time to do it, alongside these other changes.

I agree that the current naming is rather odd. generic_file_* on the
other hand would be completely wrong - generic_file_* are the generic
pagecache routines, these are wrappers to use ->aio_read/->aio_write.

Currently I can't imagine a better name, though.

2006-05-22 01:01:05

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

Badari Pulavarty <[email protected]> wrote:
>
> This patch removes readv() and writev() methods and replaces
> them with aio_read()/aio_write() methods.

And it breaks autofs4

autofs: pipe file descriptor does not contain proper ops

2006-05-22 04:39:37

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead



Andrew Morton wrote:

>Badari Pulavarty <[email protected]> wrote:
>
>>This patch removes readv() and writev() methods and replaces
>> them with aio_read()/aio_write() methods.
>>
>
>And it breaks autofs4
>
>autofs: pipe file descriptor does not contain proper ops
>

Yuck. I will take a look. Unfortunately, I am travelling next week.
It will have to wait for atleast 10 days or so. :(

Thanks,
Badari

>


2006-05-22 05:35:36

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

On Sun, May 21, 2006 at 06:00:37PM -0700, Andrew Morton wrote:
> Badari Pulavarty <[email protected]> wrote:
> >
> > This patch removes readv() and writev() methods and replaces
> > them with aio_read()/aio_write() methods.
>
> And it breaks autofs4
>
> autofs: pipe file descriptor does not contain proper ops

this comes because the autofs4 pipe fd doesn't have a write file
operations.

Badari do you remember any place in your patches where you didn't
add do_sync_write for a file_operations instance?

Ian, what kind of file is the autofs4 pipe? is it a named pipe or
a fifo or a "real" file?

2006-05-22 08:16:40

by Ian Kent

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

Christoph Hellwig wrote:
> On Sun, May 21, 2006 at 06:00:37PM -0700, Andrew Morton wrote:
>> Badari Pulavarty <[email protected]> wrote:
>>> This patch removes readv() and writev() methods and replaces
>>> them with aio_read()/aio_write() methods.
>> And it breaks autofs4
>>
>> autofs: pipe file descriptor does not contain proper ops
>
> this comes because the autofs4 pipe fd doesn't have a write file
> operations.
>
> Badari do you remember any place in your patches where you didn't
> add do_sync_write for a file_operations instance?
>
> Ian, what kind of file is the autofs4 pipe? is it a named pipe or
> a fifo or a "real" file?

The autofs4 fill_super does an fget on a pipe file handle passed in the
mount options. It uses the write method of the returned file struct to
send packets back to the daemon. The fill_super method checks to see if
there is a write method present in the returned file struct and emits
this message if it's not found.

Ian


2006-05-22 08:19:58

by Ian Kent

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

Christoph Hellwig wrote:
> On Sun, May 21, 2006 at 06:00:37PM -0700, Andrew Morton wrote:
>> Badari Pulavarty <[email protected]> wrote:
>>> This patch removes readv() and writev() methods and replaces
>>> them with aio_read()/aio_write() methods.
>> And it breaks autofs4
>>
>> autofs: pipe file descriptor does not contain proper ops
>
> this comes because the autofs4 pipe fd doesn't have a write file
> operations.
>
> Badari do you remember any place in your patches where you didn't
> add do_sync_write for a file_operations instance?
>
> Ian, what kind of file is the autofs4 pipe? is it a named pipe or
> a fifo or a "real" file?

Ahh. Sorry missed the actual question.
It's a FIFO created with pipe(2).

2006-05-22 09:29:46

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

Christoph Hellwig <[email protected]> wrote:
>
> On Sun, May 21, 2006 at 06:00:37PM -0700, Andrew Morton wrote:
> > Badari Pulavarty <[email protected]> wrote:
> > >
> > > This patch removes readv() and writev() methods and replaces
> > > them with aio_read()/aio_write() methods.
> >
> > And it breaks autofs4
> >
> > autofs: pipe file descriptor does not contain proper ops
>
> this comes because the autofs4 pipe fd doesn't have a write file
> operations.
>

Note that fs/autofs/inode.c does the same thing.

2006-05-22 09:36:04

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

Andrew Morton <[email protected]> wrote:
>
> Christoph Hellwig <[email protected]> wrote:
> >
> > On Sun, May 21, 2006 at 06:00:37PM -0700, Andrew Morton wrote:
> > > Badari Pulavarty <[email protected]> wrote:
> > > >
> > > > This patch removes readv() and writev() methods and replaces
> > > > them with aio_read()/aio_write() methods.
> > >
> > > And it breaks autofs4
> > >
> > > autofs: pipe file descriptor does not contain proper ops
> >
> > this comes because the autofs4 pipe fd doesn't have a write file
> > operations.
> >
>
> Note that fs/autofs/inode.c does the same thing.

The loop driver plays with file_operations.write() also. The code should
be reviewed and tested against filesystems which use LO_FLAGS_USE_AOPS as
well as against those which do not, please.

2006-05-22 10:33:24

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

On Mon, May 22, 2006 at 02:35:19AM -0700, Andrew Morton wrote:
> The loop driver plays with file_operations.write() also. The code should
> be reviewed and tested against filesystems which use LO_FLAGS_USE_AOPS as
> well as against those which do not, please.

The LO_FLAGS_USE_AOPS stuff is broken, please drop it from -mm. I
explained to the RedHAt guy in detail on how to get it right.

That beeing said the bu isn't autofs using ->write directly which is
done in a lot of places but the pipe code not setting it to
do_sync_write.

2006-05-22 10:44:56

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

Christoph Hellwig <[email protected]> wrote:
>
> On Mon, May 22, 2006 at 02:35:19AM -0700, Andrew Morton wrote:
> > The loop driver plays with file_operations.write() also. The code should
> > be reviewed and tested against filesystems which use LO_FLAGS_USE_AOPS as
> > well as against those which do not, please.
>
> The LO_FLAGS_USE_AOPS stuff is broken, please drop it from -mm.

Did, ages ago. I was referring to the present mainline loop
implementation.

2006-05-22 10:51:47

by Ian Kent

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

On Mon, 22 May 2006, Christoph Hellwig wrote:

> On Mon, May 22, 2006 at 02:35:19AM -0700, Andrew Morton wrote:
> > The loop driver plays with file_operations.write() also. The code should
> > be reviewed and tested against filesystems which use LO_FLAGS_USE_AOPS as
> > well as against those which do not, please.
>
> The LO_FLAGS_USE_AOPS stuff is broken, please drop it from -mm. I
> explained to the RedHAt guy in detail on how to get it right.
>
> That beeing said the bu isn't autofs using ->write directly which is
> done in a lot of places but the pipe code not setting it to
> do_sync_write.
>

If there's anything I need to do to help just point me in the right
direction.

The brief look at the patch that I had left me thinking the read and write
methods were not set but I probably missed something obvious and didn't
return to look again.

Ian

2006-05-22 14:59:11

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

On Sun, 2006-05-21 at 18:00 -0700, Andrew Morton wrote:
> Badari Pulavarty <[email protected]> wrote:
> >
> > This patch removes readv() and writev() methods and replaces
> > them with aio_read()/aio_write() methods.
>
> And it breaks autofs4
>
> autofs: pipe file descriptor does not contain proper ops
>

Any easy test case to reproduce the problem ?

Thanks,
Badari

2006-05-22 15:23:07

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

On Mon, 2006-05-22 at 07:34 +0200, Christoph Hellwig wrote:
> On Sun, May 21, 2006 at 06:00:37PM -0700, Andrew Morton wrote:
> > Badari Pulavarty <[email protected]> wrote:
> > >
> > > This patch removes readv() and writev() methods and replaces
> > > them with aio_read()/aio_write() methods.
> >
> > And it breaks autofs4
> >
> > autofs: pipe file descriptor does not contain proper ops
>
> this comes because the autofs4 pipe fd doesn't have a write file
> operations.
>
> Badari do you remember any place in your patches where you didn't
> add do_sync_write for a file_operations instance?
>

Yes. Your original patchset remove bunch of .write methods, without
setting them to do_sync_write. Since the regular read(2)/write(2)
would be automatically using do_sync_* and can make use of aio methods
anyway - I assumed they are correct.

I guess I need to convert them to do_sync_write() instead. Do we
need to look at .read also ?

"grep" shows:

- .write = tun_chr_write,
- .write = fuse_dev_write,
- .write = pipe_write,
- .write = pipe_write,
- .write = pipe_write,
- .write = pipe_write,


Here is the updated patch which adds do_sync_read/do_sync_write for
these places.

Thanks,
Badari

This patch removes readv() and writev() methods and replaces
them with aio_read()/aio_write() methods.

Signed-off-by: Christoph Hellwig <[email protected]>
Signed-off-by: Badari Pulavarty <[email protected]>

drivers/char/raw.c | 2
drivers/net/tun.c | 37 ++++------------
fs/bad_inode.c | 2
fs/block_dev.c | 2
fs/cifs/cifsfs.c | 16 ------
fs/compat.c | 44 ++++---------------
fs/ext2/file.c | 2
fs/ext3/file.c | 2
fs/fat/file.c | 2
fs/fuse/dev.c | 37 ++++------------
fs/hostfs/hostfs_kern.c | 2
fs/jfs/file.c | 2
fs/ntfs/file.c | 2
fs/pipe.c | 59 +++++++++----------------
fs/read_write.c | 101 +++++++++++++++++++++++++++++---------------
fs/read_write.h | 14 ++++++
fs/xfs/linux-2.6/xfs_file.c | 92 ----------------------------------------
include/linux/fs.h | 6 --
mm/filemap.c | 36 ---------------
net/socket.c | 40 -----------------
sound/core/pcm_native.c | 40 ++++++++---------
21 files changed, 154 insertions(+), 386 deletions(-)

Index: linux-2.6.17-rc4/drivers/char/raw.c
===================================================================
--- linux-2.6.17-rc4.orig/drivers/char/raw.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/drivers/char/raw.c 2006-05-22 09:24:13.000000000 -0700
@@ -258,8 +258,6 @@ static struct file_operations raw_fops =
.open = raw_open,
.release= raw_release,
.ioctl = raw_ioctl,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.owner = THIS_MODULE,
};

Index: linux-2.6.17-rc4/drivers/net/tun.c
===================================================================
--- linux-2.6.17-rc4.orig/drivers/net/tun.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/drivers/net/tun.c 2006-05-22 09:23:17.000000000 -0700
@@ -289,11 +289,10 @@ static inline size_t iov_total(const str
return len;
}

-/* Writev */
-static ssize_t tun_chr_writev(struct file * file, const struct iovec *iv,
- unsigned long count, loff_t *pos)
+static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
{
- struct tun_struct *tun = file->private_data;
+ struct tun_struct *tun = iocb->ki_filp->private_data;

if (!tun)
return -EBADFD;
@@ -303,14 +302,6 @@ static ssize_t tun_chr_writev(struct fil
return tun_get_user(tun, (struct iovec *) iv, iov_total(iv, count));
}

-/* Write */
-static ssize_t tun_chr_write(struct file * file, const char __user * buf,
- size_t count, loff_t *pos)
-{
- struct iovec iv = { (void __user *) buf, count };
- return tun_chr_writev(file, &iv, 1, pos);
-}
-
/* Put packet to the user space buffer */
static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
struct sk_buff *skb,
@@ -344,10 +335,10 @@ static __inline__ ssize_t tun_put_user(s
return total;
}

-/* Readv */
-static ssize_t tun_chr_readv(struct file *file, const struct iovec *iv,
- unsigned long count, loff_t *pos)
+static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
{
+ struct file *file = iocb->ki_filp;
struct tun_struct *tun = file->private_data;
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
@@ -427,14 +418,6 @@ static ssize_t tun_chr_readv(struct file
return ret;
}

-/* Read */
-static ssize_t tun_chr_read(struct file * file, char __user * buf,
- size_t count, loff_t *pos)
-{
- struct iovec iv = { buf, count };
- return tun_chr_readv(file, &iv, 1, pos);
-}
-
static void tun_setup(struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
@@ -762,10 +745,10 @@ static int tun_chr_close(struct inode *i
static struct file_operations tun_fops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
- .read = tun_chr_read,
- .readv = tun_chr_readv,
- .write = tun_chr_write,
- .writev = tun_chr_writev,
+ .read = do_sync_read,
+ .aio_read = tun_chr_aio_read,
+ .write = do_sync_write,
+ .aio_write = tun_chr_aio_write,
.poll = tun_chr_poll,
.ioctl = tun_chr_ioctl,
.open = tun_chr_open,
Index: linux-2.6.17-rc4/fs/bad_inode.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/bad_inode.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/bad_inode.c 2006-05-22 09:23:17.000000000 -0700
@@ -40,8 +40,6 @@ static const struct file_operations bad_
.aio_fsync = EIO_ERROR,
.fasync = EIO_ERROR,
.lock = EIO_ERROR,
- .readv = EIO_ERROR,
- .writev = EIO_ERROR,
.sendfile = EIO_ERROR,
.sendpage = EIO_ERROR,
.get_unmapped_area = EIO_ERROR,
Index: linux-2.6.17-rc4/fs/block_dev.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/block_dev.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/block_dev.c 2006-05-22 09:24:13.000000000 -0700
@@ -1093,8 +1093,6 @@ const struct file_operations def_blk_fop
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
- .readv = generic_file_readv,
- .writev = generic_file_write_nolock,
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
Index: linux-2.6.17-rc4/fs/cifs/cifsfs.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/cifs/cifsfs.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/cifs/cifsfs.c 2006-05-22 09:23:17.000000000 -0700
@@ -484,18 +484,6 @@ cifs_get_sb(struct file_system_type *fs_
return sb;
}

-static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct inode *inode = file->f_dentry->d_inode;
- ssize_t written;
-
- written = generic_file_writev(file, iov, nr_segs, ppos);
- if (!CIFS_I(inode)->clientCanCacheAll)
- filemap_fdatawrite(inode->i_mapping);
- return written;
-}
-
static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -581,8 +569,6 @@ struct inode_operations cifs_symlink_ino
const struct file_operations cifs_file_ops = {
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = cifs_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = cifs_file_aio_write,
.open = cifs_open,
@@ -624,8 +610,6 @@ const struct file_operations cifs_file_d
const struct file_operations cifs_file_nobrl_ops = {
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = cifs_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = cifs_file_aio_write,
.open = cifs_open,
Index: linux-2.6.17-rc4/fs/compat.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/compat.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/compat.c 2006-05-22 09:23:17.000000000 -0700
@@ -55,6 +55,8 @@

extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);

+#include "read_write.h"
+
/*
* Not all architectures have sys_utime, so implement this in terms
* of sys_utimes.
@@ -1139,9 +1141,6 @@ static ssize_t compat_do_readv_writev(in
const struct compat_iovec __user *uvector,
unsigned long nr_segs, loff_t *pos)
{
- typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
- typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *);
-
compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov=iovstack, *vector;
@@ -1224,39 +1223,18 @@ static ssize_t compat_do_readv_writev(in
fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->readv;
+ fnv = file->f_op->aio_read;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->writev;
- }
- if (fnv) {
- ret = fnv(file, iov, nr_segs, pos);
- goto out;
+ fnv = file->f_op->aio_write;
}

- /* Do it by hand, with file-ops */
- ret = 0;
- vector = iov;
- while (nr_segs > 0) {
- void __user * base;
- size_t len;
- ssize_t nr;
-
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(file, base, len, pos);
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+ pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);

- if (nr < 0) {
- if (!ret) ret = nr;
- break;
- }
- ret += nr;
- if (nr != len)
- break;
- }
out:
if (iov != iovstack)
kfree(iov);
@@ -1284,7 +1262,7 @@ compat_sys_readv(unsigned long fd, const
goto out;

ret = -EINVAL;
- if (!file->f_op || (!file->f_op->readv && !file->f_op->read))
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
goto out;

ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos);
@@ -1307,7 +1285,7 @@ compat_sys_writev(unsigned long fd, cons
goto out;

ret = -EINVAL;
- if (!file->f_op || (!file->f_op->writev && !file->f_op->write))
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
goto out;

ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos);
Index: linux-2.6.17-rc4/fs/ext2/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ext2/file.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/ext2/file.c 2006-05-22 09:24:13.000000000 -0700
@@ -50,8 +50,6 @@ const struct file_operations ext2_file_o
.open = generic_file_open,
.release = ext2_release_file,
.fsync = ext2_sync_file,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
Index: linux-2.6.17-rc4/fs/ext3/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ext3/file.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/ext3/file.c 2006-05-22 09:23:17.000000000 -0700
@@ -112,8 +112,6 @@ const struct file_operations ext3_file_o
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = ext3_file_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.ioctl = ext3_ioctl,
.mmap = generic_file_mmap,
.open = generic_file_open,
Index: linux-2.6.17-rc4/fs/fat/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/fat/file.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/fat/file.c 2006-05-22 09:23:17.000000000 -0700
@@ -116,8 +116,6 @@ const struct file_operations fat_file_op
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
Index: linux-2.6.17-rc4/fs/fuse/dev.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/fuse/dev.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/fuse/dev.c 2006-05-22 09:23:17.000000000 -0700
@@ -585,14 +585,15 @@ static void request_wait(struct fuse_con
* request_end(). Otherwise add it to the processing list, and set
* the 'sent' flag.
*/
-static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *off)
+static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
int err;
struct fuse_req *req;
struct fuse_in *in;
struct fuse_copy_state cs;
unsigned reqsize;
+ struct file *file = iocb->ki_filp;
struct fuse_conn *fc = fuse_get_conn(file);
if (!fc)
return -EPERM;
@@ -658,15 +659,6 @@ static ssize_t fuse_dev_readv(struct fil
return err;
}

-static ssize_t fuse_dev_read(struct file *file, char __user *buf,
- size_t nbytes, loff_t *off)
-{
- struct iovec iov;
- iov.iov_len = nbytes;
- iov.iov_base = buf;
- return fuse_dev_readv(file, &iov, 1, off);
-}
-
/* Look up request on processing list by unique ID */
static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
{
@@ -711,15 +703,15 @@ static int copy_out_args(struct fuse_cop
* it from the list and copy the rest of the buffer to the request.
* The request is finished by calling request_end()
*/
-static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *off)
+static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
int err;
unsigned nbytes = iov_length(iov, nr_segs);
struct fuse_req *req;
struct fuse_out_header oh;
struct fuse_copy_state cs;
- struct fuse_conn *fc = fuse_get_conn(file);
+ struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
if (!fc)
return -EPERM;

@@ -779,15 +771,6 @@ static ssize_t fuse_dev_writev(struct fi
return err;
}

-static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
- size_t nbytes, loff_t *off)
-{
- struct iovec iov;
- iov.iov_len = nbytes;
- iov.iov_base = (char __user *) buf;
- return fuse_dev_writev(file, &iov, 1, off);
-}
-
static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
{
unsigned mask = POLLOUT | POLLWRNORM;
@@ -921,10 +904,10 @@ static int fuse_dev_fasync(int fd, struc
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
.llseek = no_llseek,
- .read = fuse_dev_read,
- .readv = fuse_dev_readv,
- .write = fuse_dev_write,
- .writev = fuse_dev_writev,
+ .read = do_sync_read,
+ .aio_read = fuse_dev_read,
+ .write = do_sync_write,
+ .aio_write = fuse_dev_write,
.poll = fuse_dev_poll,
.release = fuse_dev_release,
.fasync = fuse_dev_fasync,
Index: linux-2.6.17-rc4/fs/hostfs/hostfs_kern.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/hostfs/hostfs_kern.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/hostfs/hostfs_kern.c 2006-05-22 09:24:13.000000000 -0700
@@ -390,8 +390,6 @@ static const struct file_operations host
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.write = generic_file_write,
.mmap = generic_file_mmap,
.open = hostfs_file_open,
Index: linux-2.6.17-rc4/fs/jfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/jfs/file.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/jfs/file.c 2006-05-22 09:24:13.000000000 -0700
@@ -108,8 +108,6 @@ const struct file_operations jfs_file_op
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.sendfile = generic_file_sendfile,
.fsync = jfs_fsync,
.release = jfs_release,
Index: linux-2.6.17-rc4/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/ntfs/file.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/ntfs/file.c 2006-05-22 09:24:13.000000000 -0700
@@ -2296,11 +2296,9 @@ const struct file_operations ntfs_file_o
.llseek = generic_file_llseek, /* Seek inside file. */
.read = generic_file_read, /* Read from file. */
.aio_read = generic_file_aio_read, /* Async read from file. */
- .readv = generic_file_readv, /* Read from file. */
#ifdef NTFS_RW
.write = ntfs_file_write, /* Write to file. */
.aio_write = ntfs_file_aio_write, /* Async write to file. */
- .writev = ntfs_file_writev, /* Write to file. */
/*.release = ,*/ /* Last file is closed. See
fs/ext2/file.c::
ext2_release_file() for
Index: linux-2.6.17-rc4/fs/pipe.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/pipe.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/pipe.c 2006-05-22 09:23:17.000000000 -0700
@@ -218,9 +218,10 @@ static struct pipe_buf_operations anon_p
};

static ssize_t
-pipe_readv(struct file *filp, const struct iovec *_iov,
- unsigned long nr_segs, loff_t *ppos)
+pipe_read(struct kiocb *iocb, const struct iovec *_iov,
+ unsigned long nr_segs, loff_t pos)
{
+ struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
struct pipe_inode_info *pipe;
int do_wakeup;
@@ -330,17 +331,10 @@ redo:
}

static ssize_t
-pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
- struct iovec iov = { .iov_base = buf, .iov_len = count };
-
- return pipe_readv(filp, &iov, 1, ppos);
-}
-
-static ssize_t
-pipe_writev(struct file *filp, const struct iovec *_iov,
- unsigned long nr_segs, loff_t *ppos)
+pipe_write(struct kiocb *iocb, const struct iovec *_iov,
+ unsigned long nr_segs, loff_t ppos)
{
+ struct file *filp = iocb->ki_filp;
struct inode *inode = filp->f_dentry->d_inode;
struct pipe_inode_info *pipe;
ssize_t ret;
@@ -510,15 +504,6 @@ out:
}

static ssize_t
-pipe_write(struct file *filp, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
-
- return pipe_writev(filp, &iov, 1, ppos);
-}
-
-static ssize_t
bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{
return -EBADF;
@@ -736,8 +721,8 @@ pipe_rdwr_open(struct inode *inode, stru
*/
const struct file_operations read_fifo_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
+ .read = do_sync_read,
+ .aio_read = pipe_read,
.write = bad_pipe_w,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
@@ -749,8 +734,8 @@ const struct file_operations read_fifo_f
const struct file_operations write_fifo_fops = {
.llseek = no_llseek,
.read = bad_pipe_r,
- .write = pipe_write,
- .writev = pipe_writev,
+ .write = do_sync_write,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_write_open,
@@ -760,10 +745,10 @@ const struct file_operations write_fifo_

const struct file_operations rdwr_fifo_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
- .write = pipe_write,
- .writev = pipe_writev,
+ .read = do_sync_read,
+ .aio_read = pipe_read,
+ .write = do_sync_write,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
@@ -773,8 +758,8 @@ const struct file_operations rdwr_fifo_f

static struct file_operations read_pipe_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
+ .read = do_sync_read,
+ .aio_read = pipe_read,
.write = bad_pipe_w,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
@@ -786,8 +771,8 @@ static struct file_operations read_pipe_
static struct file_operations write_pipe_fops = {
.llseek = no_llseek,
.read = bad_pipe_r,
- .write = pipe_write,
- .writev = pipe_writev,
+ .write = do_sync_write,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_write_open,
@@ -797,10 +782,10 @@ static struct file_operations write_pipe

static struct file_operations rdwr_pipe_fops = {
.llseek = no_llseek,
- .read = pipe_read,
- .readv = pipe_readv,
- .write = pipe_write,
- .writev = pipe_writev,
+ .read = do_sync_read,
+ .aio_read = pipe_read,
+ .write = do_sync_write,
+ .aio_write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
Index: linux-2.6.17-rc4/fs/read_write.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/read_write.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/read_write.c 2006-05-22 09:24:14.000000000 -0700
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
+#include "read_write.h"

#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -450,6 +451,62 @@ unsigned long iov_shorten(struct iovec *

EXPORT_SYMBOL(iov_shorten);

+ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
+{
+ struct kiocb kiocb;
+ ssize_t ret;
+
+ init_sync_kiocb(&kiocb, filp);
+ kiocb.ki_pos = *ppos;
+ kiocb.ki_left = len;
+ kiocb.ki_nbytes = len;
+
+ for (;;) {
+ ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
+ wait_on_retry_sync_kiocb(&kiocb);
+ }
+
+ if (ret == -EIOCBQUEUED)
+ ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
+ return ret;
+}
+
+/* Do it by hand, with file-ops */
+ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
+{
+ struct iovec *vector = iov;
+ ssize_t ret = 0;
+
+ while (nr_segs > 0) {
+ void __user *base;
+ size_t len;
+ ssize_t nr;
+
+ base = vector->iov_base;
+ len = vector->iov_len;
+ vector++;
+ nr_segs--;
+
+ nr = fn(filp, base, len, ppos);
+
+ if (nr < 0) {
+ if (!ret)
+ ret = nr;
+ break;
+ }
+ ret += nr;
+ if (nr != len)
+ break;
+ }
+
+ return ret;
+}
+
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)

@@ -457,12 +514,9 @@ static ssize_t do_readv_writev(int type,
const struct iovec __user * uvector,
unsigned long nr_segs, loff_t *pos)
{
- typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
- typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *);
-
size_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov=iovstack, *vector;
+ struct iovec *iov = iovstack;
ssize_t ret;
int seg;
io_fn_t fn;
@@ -532,39 +586,18 @@ static ssize_t do_readv_writev(int type,
fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->readv;
+ fnv = file->f_op->aio_read;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->writev;
- }
- if (fnv) {
- ret = fnv(file, iov, nr_segs, pos);
- goto out;
+ fnv = file->f_op->aio_write;
}

- /* Do it by hand, with file-ops */
- ret = 0;
- vector = iov;
- while (nr_segs > 0) {
- void __user * base;
- size_t len;
- ssize_t nr;
-
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(file, base, len, pos);
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+ pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);

- if (nr < 0) {
- if (!ret) ret = nr;
- break;
- }
- ret += nr;
- if (nr != len)
- break;
- }
out:
if (iov != iovstack)
kfree(iov);
@@ -585,7 +618,7 @@ ssize_t vfs_readv(struct file *file, con
{
if (!(file->f_mode & FMODE_READ))
return -EBADF;
- if (!file->f_op || (!file->f_op->readv && !file->f_op->read))
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
return -EINVAL;

return do_readv_writev(READ, file, vec, vlen, pos);
@@ -598,7 +631,7 @@ ssize_t vfs_writev(struct file *file, co
{
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
- if (!file->f_op || (!file->f_op->writev && !file->f_op->write))
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
return -EINVAL;

return do_readv_writev(WRITE, file, vec, vlen, pos);
Index: linux-2.6.17-rc4/fs/read_write.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17-rc4/fs/read_write.h 2006-05-22 09:23:17.000000000 -0700
@@ -0,0 +1,14 @@
+/*
+ * This file is only for sharing some helpers from read_write.c with compat.c.
+ * Don't use anywhere else.
+ */
+
+
+typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
+typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+
+ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
+ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
Index: linux-2.6.17-rc4/fs/xfs/linux-2.6/xfs_file.c
===================================================================
--- linux-2.6.17-rc4.orig/fs/xfs/linux-2.6/xfs_file.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/fs/xfs/linux-2.6/xfs_file.c 2006-05-22 09:23:17.000000000 -0700
@@ -129,94 +129,6 @@ xfs_file_aio_write_invis(
return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}

-STATIC inline ssize_t
-__xfs_file_readv(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = vn_from_inode(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
-
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
- VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-xfs_file_readv(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_readv(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-xfs_file_readv_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
-STATIC inline ssize_t
-__xfs_file_writev(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = vn_from_inode(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
-
- VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-xfs_file_writev(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_writev(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-xfs_file_writev_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_writev(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
STATIC ssize_t
xfs_file_sendfile(
struct file *filp,
@@ -577,8 +489,6 @@ const struct file_operations xfs_file_op
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = xfs_file_readv,
- .writev = xfs_file_writev,
.aio_read = xfs_file_aio_read,
.aio_write = xfs_file_aio_write,
.sendfile = xfs_file_sendfile,
@@ -601,8 +511,6 @@ const struct file_operations xfs_invis_f
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = xfs_file_readv_invis,
- .writev = xfs_file_writev_invis,
.aio_read = xfs_file_aio_read_invis,
.aio_write = xfs_file_aio_write_invis,
.sendfile = xfs_file_sendfile_invis,
Index: linux-2.6.17-rc4/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc4.orig/include/linux/fs.h 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/include/linux/fs.h 2006-05-22 09:24:14.000000000 -0700
@@ -1031,8 +1031,6 @@ struct file_operations {
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
- ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
@@ -1624,10 +1622,6 @@ extern long do_splice_direct(struct file

extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
-extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
-ssize_t generic_file_writev(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos);
extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
extern loff_t remote_llseek(struct file *file, loff_t offset, int origin);
Index: linux-2.6.17-rc4/mm/filemap.c
===================================================================
--- linux-2.6.17-rc4.orig/mm/filemap.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/mm/filemap.c 2006-05-22 09:24:13.000000000 -0700
@@ -2298,42 +2298,6 @@ ssize_t generic_file_write(struct file *
}
EXPORT_SYMBOL(generic_file_write);

-ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- ret = __generic_file_aio_read(&kiocb, iov, nr_segs, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-EXPORT_SYMBOL(generic_file_readv);
-
-ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret;
-
- mutex_lock(&inode->i_mutex);
- ret = __generic_file_write_nolock(file, iov, nr_segs, ppos);
- mutex_unlock(&inode->i_mutex);
-
- if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
-
- err = sync_page_range(inode, mapping, *ppos - ret, ret);
- if (err < 0)
- ret = err;
- }
- return ret;
-}
-EXPORT_SYMBOL(generic_file_writev);
-
/*
* Called under i_mutex for writes to S_ISREG files. Returns -EIO if something
* went wrong during pagecache shootdown.
Index: linux-2.6.17-rc4/net/socket.c
===================================================================
--- linux-2.6.17-rc4.orig/net/socket.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/net/socket.c 2006-05-22 09:23:17.000000000 -0700
@@ -112,10 +112,6 @@ static long compat_sock_ioctl(struct fil
unsigned int cmd, unsigned long arg);
#endif
static int sock_fasync(int fd, struct file *filp, int on);
-static ssize_t sock_readv(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
-static ssize_t sock_writev(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more);

@@ -138,8 +134,6 @@ static struct file_operations socket_fil
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
- .readv = sock_readv,
- .writev = sock_writev,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
};
@@ -738,23 +732,6 @@ static ssize_t do_sock_read(struct msghd
return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
}

-static ssize_t sock_readv(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb iocb;
- struct sock_iocb siocb;
- struct msghdr msg;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- iocb.private = &siocb;
-
- ret = do_sock_read(&msg, &iocb, file, iov, nr_segs);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
-}
-
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -797,23 +774,6 @@ static ssize_t do_sock_write(struct msgh
return __sock_sendmsg(iocb, sock, msg, size);
}

-static ssize_t sock_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct msghdr msg;
- struct kiocb iocb;
- struct sock_iocb siocb;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- iocb.private = &siocb;
-
- ret = do_sock_write(&msg, &iocb, file, iov, nr_segs);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
-}
-
static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
Index: linux-2.6.17-rc4/sound/core/pcm_native.c
===================================================================
--- linux-2.6.17-rc4.orig/sound/core/pcm_native.c 2006-05-22 09:22:16.000000000 -0700
+++ linux-2.6.17-rc4/sound/core/pcm_native.c 2006-05-22 09:23:17.000000000 -0700
@@ -2819,8 +2819,8 @@ static ssize_t snd_pcm_write(struct file
return result;
}

-static ssize_t snd_pcm_readv(struct file *file, const struct iovec *_vector,
- unsigned long count, loff_t * offset)
+static ssize_t snd_pcm_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)

{
struct snd_pcm_file *pcm_file;
@@ -2831,22 +2831,22 @@ static ssize_t snd_pcm_readv(struct file
void __user **bufs;
snd_pcm_uframes_t frames;

- pcm_file = file->private_data;
+ pcm_file = iocb->ki_filp->private_data;
substream = pcm_file->substream;
snd_assert(substream != NULL, return -ENXIO);
runtime = substream->runtime;
if (runtime->status->state == SNDRV_PCM_STATE_OPEN)
return -EBADFD;
- if (count > 1024 || count != runtime->channels)
+ if (nr_segs > 1024 || nr_segs != runtime->channels)
return -EINVAL;
- if (!frame_aligned(runtime, _vector->iov_len))
+ if (!frame_aligned(runtime, iov->iov_len))
return -EINVAL;
- frames = bytes_to_samples(runtime, _vector->iov_len);
- bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL);
+ frames = bytes_to_samples(runtime, iov->iov_len);
+ bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
- for (i = 0; i < count; ++i)
- bufs[i] = _vector[i].iov_base;
+ for (i = 0; i < nr_segs; ++i)
+ bufs[i] = iov[i].iov_base;
result = snd_pcm_lib_readv(substream, bufs, frames);
if (result > 0)
result = frames_to_bytes(runtime, result);
@@ -2854,8 +2854,8 @@ static ssize_t snd_pcm_readv(struct file
return result;
}

-static ssize_t snd_pcm_writev(struct file *file, const struct iovec *_vector,
- unsigned long count, loff_t * offset)
+static ssize_t snd_pcm_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct snd_pcm_file *pcm_file;
struct snd_pcm_substream *substream;
@@ -2865,7 +2865,7 @@ static ssize_t snd_pcm_writev(struct fil
void __user **bufs;
snd_pcm_uframes_t frames;

- pcm_file = file->private_data;
+ pcm_file = iocb->ki_filp->private_data;
substream = pcm_file->substream;
snd_assert(substream != NULL, result = -ENXIO; goto end);
runtime = substream->runtime;
@@ -2873,17 +2873,17 @@ static ssize_t snd_pcm_writev(struct fil
result = -EBADFD;
goto end;
}
- if (count > 128 || count != runtime->channels ||
- !frame_aligned(runtime, _vector->iov_len)) {
+ if (nr_segs > 128 || nr_segs != runtime->channels ||
+ !frame_aligned(runtime, iov->iov_len)) {
result = -EINVAL;
goto end;
}
- frames = bytes_to_samples(runtime, _vector->iov_len);
- bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL);
+ frames = bytes_to_samples(runtime, iov->iov_len);
+ bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
- for (i = 0; i < count; ++i)
- bufs[i] = _vector[i].iov_base;
+ for (i = 0; i < nr_segs; ++i)
+ bufs[i] = iov[i].iov_base;
result = snd_pcm_lib_writev(substream, bufs, frames);
if (result > 0)
result = frames_to_bytes(runtime, result);
@@ -3389,7 +3389,7 @@ struct file_operations snd_pcm_f_ops[2]
{
.owner = THIS_MODULE,
.write = snd_pcm_write,
- .writev = snd_pcm_writev,
+ .aio_write = snd_pcm_aio_write,
.open = snd_pcm_playback_open,
.release = snd_pcm_release,
.poll = snd_pcm_playback_poll,
@@ -3401,7 +3401,7 @@ struct file_operations snd_pcm_f_ops[2]
{
.owner = THIS_MODULE,
.read = snd_pcm_read,
- .readv = snd_pcm_readv,
+ .aio_read = snd_pcm_aio_read,
.open = snd_pcm_capture_open,
.release = snd_pcm_release,
.poll = snd_pcm_capture_poll,




2006-05-22 17:07:14

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

Badari Pulavarty <[email protected]> wrote:
>
> On Sun, 2006-05-21 at 18:00 -0700, Andrew Morton wrote:
> > Badari Pulavarty <[email protected]> wrote:
> > >
> > > This patch removes readv() and writev() methods and replaces
> > > them with aio_read()/aio_write() methods.
> >
> > And it breaks autofs4
> >
> > autofs: pipe file descriptor does not contain proper ops
> >
>
> Any easy test case to reproduce the problem ?
>

Grab an FC5 setup, copy RH's .config into your tree.

2006-05-22 17:23:17

by Badari Pulavarty

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

On Mon, 2006-05-22 at 10:06 -0700, Andrew Morton wrote:
> Badari Pulavarty <[email protected]> wrote:
> >
> > On Sun, 2006-05-21 at 18:00 -0700, Andrew Morton wrote:
> > > Badari Pulavarty <[email protected]> wrote:
> > > >
> > > > This patch removes readv() and writev() methods and replaces
> > > > them with aio_read()/aio_write() methods.
> > >
> > > And it breaks autofs4
> > >
> > > autofs: pipe file descriptor does not contain proper ops
> > >
> >
> > Any easy test case to reproduce the problem ?
> >
>
> Grab an FC5 setup, copy RH's .config into your tree.

Will do.

Like I mentioned, I am travelling this week. I would really
appreciate if someone could test my updated patch (I sent out
in my earlier mail).

Thanks,
Badari

2006-05-23 08:29:50

by Ian Kent

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

Badari Pulavarty wrote:
> On Mon, 2006-05-22 at 10:06 -0700, Andrew Morton wrote:
>> Badari Pulavarty <[email protected]> wrote:
>>> On Sun, 2006-05-21 at 18:00 -0700, Andrew Morton wrote:
>>>> Badari Pulavarty <[email protected]> wrote:
>>>>> This patch removes readv() and writev() methods and replaces
>>>>> them with aio_read()/aio_write() methods.
>>>> And it breaks autofs4
>>>>
>>>> autofs: pipe file descriptor does not contain proper ops
>>>>
>>> Any easy test case to reproduce the problem ?
>>>
>> Grab an FC5 setup, copy RH's .config into your tree.
>
> Will do.
>
> Like I mentioned, I am travelling this week. I would really
> appreciate if someone could test my updated patch (I sent out
> in my earlier mail).

Doesn't seem to apply to 2.6.17-rc4.

[raven@raven linux-2.6.16]$ patch -p1 <
~/remove-readv_writev-methods-and-use-aio_read_aio_write.patch
patching file drivers/char/raw.c
Hunk #1 succeeded at 270 (offset 12 lines).
patching file drivers/net/tun.c
patching file fs/bad_inode.c
patching file fs/block_dev.c
Hunk #1 succeeded at 1101 (offset 8 lines).
patching file fs/cifs/cifsfs.c
Hunk #1 FAILED at 484.
1 out of 3 hunks FAILED -- saving rejects to file fs/cifs/cifsfs.c.rej
patching file fs/compat.c
patching file fs/ext2/file.c
patching file fs/ext3/file.c
Hunk #1 succeeded at 111 (offset -1 lines).
patching file fs/fat/file.c
patching file fs/fuse/dev.c
patching file fs/hostfs/hostfs_kern.c
patching file fs/jfs/file.c
patching file fs/ntfs/file.c
Hunk #1 succeeded at 2298 (offset 2 lines).
patching file fs/pipe.c
patching file fs/read_write.c
Hunk #2 succeeded at 439 (offset -12 lines).
Hunk #4 succeeded at 574 (offset -12 lines).
Hunk #6 succeeded at 619 (offset -12 lines).
patching file fs/read_write.h
patching file fs/xfs/linux-2.6/xfs_file.c
Hunk #1 succeeded at 131 with fuzz 1 (offset 2 lines).
Hunk #3 succeeded at 513 (offset 2 lines).
patching file include/linux/fs.h
patching file mm/filemap.c
Hunk #1 succeeded at 2300 (offset 2 lines).
patching file net/socket.c
Hunk #3 FAILED at 732.
Hunk #4 FAILED at 774.
2 out of 4 hunks FAILED -- saving rejects to file net/socket.c.rej
patching file sound/core/pcm_native.c
[raven@raven linux-2.6.16]$

Ian

2006-05-23 15:05:21

by Ian Kent

[permalink] [raw]
Subject: Re: [PATCH 2/4] Remove readv/writev methods and use aio_read/aio_write instead

On Tue, 2006-05-23 at 16:29 +0800, Ian Kent wrote:
> Badari Pulavarty wrote:
> > On Mon, 2006-05-22 at 10:06 -0700, Andrew Morton wrote:
> >> Badari Pulavarty <[email protected]> wrote:
> >>> On Sun, 2006-05-21 at 18:00 -0700, Andrew Morton wrote:
> >>>> Badari Pulavarty <[email protected]> wrote:
> >>>>> This patch removes readv() and writev() methods and replaces
> >>>>> them with aio_read()/aio_write() methods.
> >>>> And it breaks autofs4
> >>>>
> >>>> autofs: pipe file descriptor does not contain proper ops
> >>>>
> >>> Any easy test case to reproduce the problem ?
> >>>
> >> Grab an FC5 setup, copy RH's .config into your tree.
> >
> > Will do.
> >
> > Like I mentioned, I am travelling this week. I would really
> > appreciate if someone could test my updated patch (I sent out
> > in my earlier mail).
>
> Doesn't seem to apply to 2.6.17-rc4.
>
> [raven@raven linux-2.6.16]$ patch -p1 <
> ~/remove-readv_writev-methods-and-use-aio_read_aio_write.patch
> patching file drivers/char/raw.c
> Hunk #1 succeeded at 270 (offset 12 lines).
> patching file drivers/net/tun.c
> patching file fs/bad_inode.c
> patching file fs/block_dev.c
> Hunk #1 succeeded at 1101 (offset 8 lines).
> patching file fs/cifs/cifsfs.c
> Hunk #1 FAILED at 484.
> 1 out of 3 hunks FAILED -- saving rejects to file fs/cifs/cifsfs.c.rej

Function cifs_file_writev appears to be already present.

> patching file fs/compat.c
> patching file fs/ext2/file.c
> patching file fs/ext3/file.c
> Hunk #1 succeeded at 111 (offset -1 lines).
> patching file fs/fat/file.c
> patching file fs/fuse/dev.c
> patching file fs/hostfs/hostfs_kern.c
> patching file fs/jfs/file.c
> patching file fs/ntfs/file.c
> Hunk #1 succeeded at 2298 (offset 2 lines).
> patching file fs/pipe.c
> patching file fs/read_write.c
> Hunk #2 succeeded at 439 (offset -12 lines).
> Hunk #4 succeeded at 574 (offset -12 lines).
> Hunk #6 succeeded at 619 (offset -12 lines).
> patching file fs/read_write.h
> patching file fs/xfs/linux-2.6/xfs_file.c
> Hunk #1 succeeded at 131 with fuzz 1 (offset 2 lines).
> Hunk #3 succeeded at 513 (offset 2 lines).
> patching file include/linux/fs.h
> patching file mm/filemap.c
> Hunk #1 succeeded at 2300 (offset 2 lines).
> patching file net/socket.c
> Hunk #3 FAILED at 732.
> Hunk #4 FAILED at 774.
> 2 out of 4 hunks FAILED -- saving rejects to file net/socket.c.rej

And similarly sock_readv and sock_writev.

> patching file sound/core/pcm_native.c
> [raven@raven linux-2.6.16]$
>

At a glance they look the same as the ones in the patch.

In case it's helpful for comparison here is a patch made after the
above.

--- linux-2.6.16/drivers/char/raw.cremove-readv_writev-methods-and-use-aio_read_aio_write 2006-05-23 22:06:21.000000000 +0800
+++ linux-2.6.16/drivers/char/raw.c 2006-05-23 22:07:29.000000000 +0800
@@ -270,8 +270,6 @@ static struct file_operations raw_fops =
.open = raw_open,
.release= raw_release,
.ioctl = raw_ioctl,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.owner = THIS_MODULE,
};

--- linux-2.6.16/net/socket.cremove-readv_writev-methods-and-use-aio_read_aio_write 2006-05-23 22:06:33.000000000 +0800
+++ linux-2.6.16/net/socket.c 2006-05-23 22:07:29.000000000 +0800
@@ -112,10 +112,6 @@ static long compat_sock_ioctl(struct fil
unsigned int cmd, unsigned long arg);
#endif
static int sock_fasync(int fd, struct file *filp, int on);
-static ssize_t sock_readv(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
-static ssize_t sock_writev(struct file *file, const struct iovec *vector,
- unsigned long count, loff_t *ppos);
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more);

@@ -138,8 +134,6 @@ static struct file_operations socket_fil
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
- .readv = sock_readv,
- .writev = sock_writev,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
};
--- linux-2.6.16/mm/filemap.cremove-readv_writev-methods-and-use-aio_read_aio_write 2006-05-23 22:06:32.000000000 +0800
+++ linux-2.6.16/mm/filemap.c 2006-05-23 22:07:29.000000000 +0800
@@ -2300,42 +2300,6 @@ ssize_t generic_file_write(struct file *
}
EXPORT_SYMBOL(generic_file_write);

-ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- ret = __generic_file_aio_read(&kiocb, iov, nr_segs, ppos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- return ret;
-}
-EXPORT_SYMBOL(generic_file_readv);
-
-ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos)
-{
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret;
-
- mutex_lock(&inode->i_mutex);
- ret = __generic_file_write_nolock(file, iov, nr_segs, ppos);
- mutex_unlock(&inode->i_mutex);
-
- if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
-
- err = sync_page_range(inode, mapping, *ppos - ret, ret);
- if (err < 0)
- ret = err;
- }
- return ret;
-}
-EXPORT_SYMBOL(generic_file_writev);
-
/*
* Called under i_mutex for writes to S_ISREG files. Returns -EIO if something
* went wrong during pagecache shootdown.
--- linux-2.6.16/fs/xfs/linux-2.6/xfs_file.cremove-readv_writev-methods-and-use-aio_read_aio_write 2006-05-23 22:06:30.000000000 +0800
+++ linux-2.6.16/fs/xfs/linux-2.6/xfs_file.c 2006-05-23 22:07:29.000000000 +0800
@@ -131,94 +131,6 @@ xfs_file_aio_write_invis(
return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
}

-STATIC inline ssize_t
-__xfs_file_readv(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = vn_from_inode(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
-
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
- VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-xfs_file_readv(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_readv(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-xfs_file_readv_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
-STATIC inline ssize_t
-__xfs_file_writev(
- struct file *file,
- const struct iovec *iov,
- int ioflags,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- struct inode *inode = file->f_mapping->host;
- vnode_t *vp = vn_from_inode(inode);
- struct kiocb kiocb;
- ssize_t rval;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
-
- VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
-
- *ppos = kiocb.ki_pos;
- return rval;
-}
-
-STATIC ssize_t
-xfs_file_writev(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_writev(file, iov, 0, nr_segs, ppos);
-}
-
-STATIC ssize_t
-xfs_file_writev_invis(
- struct file *file,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos)
-{
- return __xfs_file_writev(file, iov, IO_INVIS, nr_segs, ppos);
-}
-
STATIC ssize_t
xfs_file_sendfile(
struct file *filp,
@@ -579,8 +491,6 @@ const struct file_operations xfs_file_op
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = xfs_file_readv,
- .writev = xfs_file_writev,
.aio_read = xfs_file_aio_read,
.aio_write = xfs_file_aio_write,
.sendfile = xfs_file_sendfile,
@@ -603,8 +513,6 @@ const struct file_operations xfs_invis_f
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .readv = xfs_file_readv_invis,
- .writev = xfs_file_writev_invis,
.aio_read = xfs_file_aio_read_invis,
.aio_write = xfs_file_aio_write_invis,
.sendfile = xfs_file_sendfile_invis,
--- linux-2.6.16/fs/read_write.cremove-readv_writev-methods-and-use-aio_read_aio_write 2006-05-23 22:06:30.000000000 +0800
+++ linux-2.6.16/fs/read_write.c 2006-05-23 22:07:29.000000000 +0800
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
+#include "read_write.h"

#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -438,6 +439,62 @@ unsigned long iov_shorten(struct iovec *

EXPORT_SYMBOL(iov_shorten);

+ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
+{
+ struct kiocb kiocb;
+ ssize_t ret;
+
+ init_sync_kiocb(&kiocb, filp);
+ kiocb.ki_pos = *ppos;
+ kiocb.ki_left = len;
+ kiocb.ki_nbytes = len;
+
+ for (;;) {
+ ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
+ if (ret != -EIOCBRETRY)
+ break;
+ wait_on_retry_sync_kiocb(&kiocb);
+ }
+
+ if (ret == -EIOCBQUEUED)
+ ret = wait_on_sync_kiocb(&kiocb);
+ *ppos = kiocb.ki_pos;
+ return ret;
+}
+
+/* Do it by hand, with file-ops */
+ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
+{
+ struct iovec *vector = iov;
+ ssize_t ret = 0;
+
+ while (nr_segs > 0) {
+ void __user *base;
+ size_t len;
+ ssize_t nr;
+
+ base = vector->iov_base;
+ len = vector->iov_len;
+ vector++;
+ nr_segs--;
+
+ nr = fn(filp, base, len, ppos);
+
+ if (nr < 0) {
+ if (!ret)
+ ret = nr;
+ break;
+ }
+ ret += nr;
+ if (nr != len)
+ break;
+ }
+
+ return ret;
+}
+
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)

@@ -445,12 +502,9 @@ static ssize_t do_readv_writev(int type,
const struct iovec __user * uvector,
unsigned long nr_segs, loff_t *pos)
{
- typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
- typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *);
-
size_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov=iovstack, *vector;
+ struct iovec *iov = iovstack;
ssize_t ret;
int seg;
io_fn_t fn;
@@ -520,39 +574,18 @@ static ssize_t do_readv_writev(int type,
fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->readv;
+ fnv = file->f_op->aio_read;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->writev;
- }
- if (fnv) {
- ret = fnv(file, iov, nr_segs, pos);
- goto out;
+ fnv = file->f_op->aio_write;
}

- /* Do it by hand, with file-ops */
- ret = 0;
- vector = iov;
- while (nr_segs > 0) {
- void __user * base;
- size_t len;
- ssize_t nr;
-
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(file, base, len, pos);
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+ pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);

- if (nr < 0) {
- if (!ret) ret = nr;
- break;
- }
- ret += nr;
- if (nr != len)
- break;
- }
out:
if (iov != iovstack)
kfree(iov);
@@ -573,7 +606,7 @@ ssize_t vfs_readv(struct file *file, con
{
if (!(file->f_mode & FMODE_READ))
return -EBADF;
- if (!file->f_op || (!file->f_op->readv && !file->f_op->read))
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
return -EINVAL;

return do_readv_writev(READ, file, vec, vlen, pos);
@@ -586,7 +619,7 @@ ssize_t vfs_writev(struct file *file, co
{
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
- if (!file->f_op || (!file->f_op->writev && !file->f_op->write))
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
return -EINVAL;

return do_readv_writev(WRITE, file, vec, vlen, pos);
--- linux-2.6.16/fs/cifs/cifsfs.cremove-readv_writev-methods-and-use-aio_read_aio_write 2006-05-23 22:06:28.000000000 +0800
+++ linux-2.6.16/fs/cifs/cifsfs.c 2006-05-23 22:07:29.000000000 +0800
@@ -581,8 +581,6 @@ struct inode_operations cifs_symlink_ino
const struct file_operations cifs_file_ops = {
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = cifs_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = cifs_file_aio_write,
.open = cifs_open,
@@ -624,8 +622,6 @@ const struct file_operations cifs_file_d
const struct file_operations cifs_file_nobrl_ops = {
.read = do_sync_read,
.write = do_sync_write,
- .readv = generic_file_readv,
- .writev = cifs_file_writev,
.aio_read = generic_file_aio_read,
.aio_write = cifs_file_aio_write,
.open = cifs_open,
--- linux-2.6.16/fs/block_dev.cremove-readv_writev-methods-and-use-aio_read_aio_write 2006-05-23 22:06:28.000000000 +0800
+++ linux-2.6.16/fs/block_dev.c 2006-05-23 22:07:29.000000000 +0800
@@ -1101,8 +1101,6 @@ const struct file_operations def_blk_fop
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
- .readv = generic_file_readv,
- .writev = generic_file_write_nolock,
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
--- linux-2.6.16/fs/ntfs/file.cremove-readv_writev-methods-and-use-aio_read_aio_write 2006-05-23 22:06:29.000000000 +0800
+++ linux-2.6.16/fs/ntfs/file.c 2006-05-23 22:07:29.000000000 +0800
@@ -2298,11 +2298,9 @@ const struct file_operations ntfs_file_o
.llseek = generic_file_llseek, /* Seek inside file. */
.read = generic_file_read, /* Read from file. */
.aio_read = generic_file_aio_read, /* Async read from file. */
- .readv = generic_file_readv, /* Read from file. */
#ifdef NTFS_RW
.write = ntfs_file_write, /* Write to file. */
.aio_write = ntfs_file_aio_write, /* Async write to file. */
- .writev = ntfs_file_writev, /* Write to file. */
/*.release = ,*/ /* Last file is closed. See
fs/ext2/file.c::
ext2_release_file() for
--- linux-2.6.16/fs/ext3/file.cremove-readv_writev-methods-and-use-aio_read_aio_write 2006-05-23 22:06:28.000000000 +0800
+++ linux-2.6.16/fs/ext3/file.c 2006-05-23 22:07:29.000000000 +0800
@@ -111,8 +111,6 @@ const struct file_operations ext3_file_o
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = ext3_file_write,
- .readv = generic_file_readv,
- .writev = generic_file_writev,
.ioctl = ext3_ioctl,
.mmap = generic_file_mmap,
.open = generic_file_open,