2017-05-27 08:17:02

by Christoph Hellwig

[permalink] [raw]
Subject: clean up readv/writev helpers

Hi Al,

this series cleans up how we implement the various vectored read and write
syscalls, as well as the internal iov_iter based APIs. It then switches
nfsd to use vfs_iter_read/write which cleans up nfsd. Note that all
exportable file systems already implement ->read_iter and ->write_iter
anyway.


2017-05-27 08:17:05

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 01/10] fs: pass on flags in compat_writev

Fixes: 793b80ef ("vfs: pass a flags argument to vfs_readv/vfs_writev")
Signed-off-by: Christoph Hellwig <[email protected]>
Cc: [email protected]
---
fs/read_write.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 47c1d4484df9..19d4d88fa285 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1285,7 +1285,7 @@ static size_t compat_writev(struct file *file,
if (!(file->f_mode & FMODE_CAN_WRITE))
goto out;

- ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0);
+ ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags);

out:
if (ret > 0)
--
2.11.0


2017-05-27 08:17:09

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 02/10] fs: remove do_readv_writev

opencode it in both callers to simplify the call stack a bit.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/read_write.c | 43 +++++++++++++++++++++----------------------
1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 19d4d88fa285..94cb71058098 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -951,51 +951,50 @@ static ssize_t __do_readv_writev(int type, struct file *file,
return ret;
}

-static ssize_t do_readv_writev(int type, struct file *file,
- const struct iovec __user *uvector,
- unsigned long nr_segs, loff_t *pos,
- int flags)
+ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
+ unsigned long vlen, loff_t *pos, int flags)
{
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;

- ret = import_iovec(type, uvector, nr_segs,
- ARRAY_SIZE(iovstack), &iov, &iter);
- if (ret < 0)
- return ret;
-
- ret = __do_readv_writev(type, file, &iter, pos, flags);
- kfree(iov);
-
- return ret;
-}
-
-ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
- unsigned long vlen, loff_t *pos, int flags)
-{
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;

- return do_readv_writev(READ, file, vec, vlen, pos, flags);
-}
+ ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
+ if (ret < 0)
+ return ret;

+ ret = __do_readv_writev(READ, file, &iter, pos, flags);
+ kfree(iov);
+ return ret;
+}
EXPORT_SYMBOL(vfs_readv);

ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, int flags)
{
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+ ssize_t ret;
+
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;

- return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
-}
+ ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
+ if (ret < 0)
+ return ret;

+ ret = __do_readv_writev(WRITE, file, &iter, pos, flags);
+ kfree(iov);
+ return ret;
+}
EXPORT_SYMBOL(vfs_writev);

static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
--
2.11.0


2017-05-27 08:17:13

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 03/10] fs: remove do_compat_readv_writev

opencode it in both callers to simplify the call stack a bit.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/read_write.c | 42 ++++++++++++++++--------------------------
1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 94cb71058098..5cbdf23d924f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1142,32 +1142,13 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
}

#ifdef CONFIG_COMPAT
-
-static ssize_t compat_do_readv_writev(int type, struct file *file,
- const struct compat_iovec __user *uvector,
- unsigned long nr_segs, loff_t *pos,
- int flags)
-{
- struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov = iovstack;
- struct iov_iter iter;
- ssize_t ret;
-
- ret = compat_import_iovec(type, uvector, nr_segs,
- UIO_FASTIOV, &iov, &iter);
- if (ret < 0)
- return ret;
-
- ret = __do_readv_writev(type, file, &iter, pos, flags);
- kfree(iov);
-
- return ret;
-}
-
static size_t compat_readv(struct file *file,
const struct compat_iovec __user *vec,
unsigned long vlen, loff_t *pos, int flags)
{
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
ssize_t ret = -EBADF;

if (!(file->f_mode & FMODE_READ))
@@ -1177,8 +1158,11 @@ static size_t compat_readv(struct file *file,
if (!(file->f_mode & FMODE_CAN_READ))
goto out;

- ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags);
-
+ ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
+ if (ret < 0)
+ goto out;
+ ret = __do_readv_writev(READ, file, &iter, pos, flags);
+ kfree(iov);
out:
if (ret > 0)
add_rchar(current, ret);
@@ -1275,6 +1259,9 @@ static size_t compat_writev(struct file *file,
const struct compat_iovec __user *vec,
unsigned long vlen, loff_t *pos, int flags)
{
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
ssize_t ret = -EBADF;

if (!(file->f_mode & FMODE_WRITE))
@@ -1284,8 +1271,11 @@ static size_t compat_writev(struct file *file,
if (!(file->f_mode & FMODE_CAN_WRITE))
goto out;

- ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags);
-
+ ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
+ if (ret < 0)
+ goto out;
+ ret = __do_readv_writev(WRITE, file, &iter, pos, flags);
+ kfree(iov);
out:
if (ret > 0)
add_wchar(current, ret);
--
2.11.0


2017-05-27 08:17:24

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 06/10] fs: set kernel address limit in do_loop_readv_writev

This will allow to use all iov_iter based read/write for ITER_KVEC
without having to check if the file has the iter version of the
read/write ops, which will allow us to get rid of a large number
of get_fs/set_fs calls in drivers.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/read_write.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/fs/read_write.c b/fs/read_write.c
index 64b61a032a56..60c64a996ab2 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -704,9 +704,18 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
loff_t *ppos, int type, int flags)
{
ssize_t ret = 0;
+ mm_segment_t uninitialized_var(old_fs);

if (flags & ~RWF_HIPRI)
return -EOPNOTSUPP;
+ if (iter->type & (ITER_BVEC | ITER_PIPE))
+ return -EOPNOTSUPP;
+
+ /* adjust the address limit for in-kernel I/O */
+ if (iter->type & ITER_KVEC) {
+ old_fs = get_fs();
+ set_fs(get_ds());
+ }

while (iov_iter_count(iter)) {
struct iovec iovec = iov_iter_iovec(iter);
@@ -731,6 +740,8 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
iov_iter_advance(iter, nr);
}

+ if (iter->type & ITER_KVEC)
+ set_fs(old_fs);
return ret;
}

--
2.11.0


2017-05-27 08:17:29

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 07/10] fs: implement vfs_iter_read using do_iter_read

De-dupliate some code and allow for passing the flags argument to
vfs_iter_read. Additional it properly updates atime now.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/block/loop.c | 4 ++--
drivers/target/target_core_file.c | 2 +-
fs/coda/file.c | 2 +-
fs/read_write.c | 29 +++++++++--------------------
include/linux/fs.h | 3 ++-
5 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 28d932906f24..1070e851ad9a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -342,7 +342,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq,

rq_for_each_segment(bvec, rq, iter) {
iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len);
- len = vfs_iter_read(lo->lo_backing_file, &i, &pos);
+ len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
if (len < 0)
return len;

@@ -383,7 +383,7 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq,
b.bv_len = bvec.bv_len;

iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len);
- len = vfs_iter_read(lo->lo_backing_file, &i, &pos);
+ len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
if (len < 0) {
ret = len;
goto out_free_page;
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 73b8f93a5fef..2befc0d7fdb1 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -275,7 +275,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
if (is_write)
ret = vfs_iter_write(fd, &iter, &pos);
else
- ret = vfs_iter_read(fd, &iter, &pos);
+ ret = vfs_iter_read(fd, &iter, &pos, 0);

if (is_write) {
if (ret < 0 || ret != data_length) {
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 9d956cd6d46f..f1102822bcfd 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -34,7 +34,7 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)

BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);

- return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos);
+ return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos, 0);
}

static ssize_t
diff --git a/fs/read_write.c b/fs/read_write.c
index 60c64a996ab2..620ac6f9dacb 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -356,26 +356,6 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
}
#endif

-ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- if (!file->f_op->read_iter)
- return -EINVAL;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
-
- iter->type |= READ;
- ret = call_read_iter(file, &kiocb, iter);
- BUG_ON(ret == -EIOCBQUEUED);
- if (ret > 0)
- *ppos = kiocb.ki_pos;
- return ret;
-}
-EXPORT_SYMBOL(vfs_iter_read);
-
ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
{
struct kiocb kiocb;
@@ -955,6 +935,15 @@ static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
return ret;
}

+ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
+ int flags)
+{
+ if (!file->f_op->read_iter)
+ return -EINVAL;
+ return do_iter_read(file, iter, ppos, flags);
+}
+EXPORT_SYMBOL(vfs_iter_read);
+
static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
loff_t *pos, int flags)
{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b2654..3f6a4f4efb32 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2789,7 +2789,8 @@ extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);

-ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos);
+ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
+ int flags);
ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos);

/* fs/block_dev.c */
--
2.11.0


2017-05-27 08:17:17

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 04/10] fs: remove __do_readv_writev

Split it into one helper each for reads vs writes.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/read_write.c | 60 ++++++++++++++++++++++++++++++++++-----------------------
1 file changed, 36 insertions(+), 24 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 5cbdf23d924f..f453c9a485b9 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -916,8 +916,8 @@ ssize_t compat_rw_copy_check_uvector(int type,
}
#endif

-static ssize_t __do_readv_writev(int type, struct file *file,
- struct iov_iter *iter, loff_t *pos, int flags)
+static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
+ loff_t *pos, int flags)
{
size_t tot_len;
ssize_t ret = 0;
@@ -925,29 +925,41 @@ static ssize_t __do_readv_writev(int type, struct file *file,
tot_len = iov_iter_count(iter);
if (!tot_len)
goto out;
- ret = rw_verify_area(type, file, pos, tot_len);
+ ret = rw_verify_area(READ, file, pos, tot_len);
if (ret < 0)
- goto out;
-
- if (type != READ)
- file_start_write(file);
+ return ret;

- if ((type == READ && file->f_op->read_iter) ||
- (type == WRITE && file->f_op->write_iter))
- ret = do_iter_readv_writev(file, iter, pos, type, flags);
+ if (file->f_op->read_iter)
+ ret = do_iter_readv_writev(file, iter, pos, READ, flags);
else
- ret = do_loop_readv_writev(file, iter, pos, type, flags);
+ ret = do_loop_readv_writev(file, iter, pos, READ, flags);
+out:
+ if (ret >= 0)
+ fsnotify_access(file);
+ return ret;
+}

- if (type != READ)
- file_end_write(file);
+static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
+ loff_t *pos, int flags)
+{
+ size_t tot_len;
+ ssize_t ret = 0;

-out:
- if ((ret + (type == READ)) > 0) {
- if (type == READ)
- fsnotify_access(file);
- else
- fsnotify_modify(file);
- }
+ tot_len = iov_iter_count(iter);
+ if (!tot_len)
+ return 0;
+ ret = rw_verify_area(WRITE, file, pos, tot_len);
+ if (ret < 0)
+ return ret;
+
+ file_start_write(file);
+ if (file->f_op->write_iter)
+ ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
+ else
+ ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
+ file_end_write(file);
+ if (ret > 0)
+ fsnotify_modify(file);
return ret;
}

@@ -968,7 +980,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
if (ret < 0)
return ret;

- ret = __do_readv_writev(READ, file, &iter, pos, flags);
+ ret = do_iter_read(file, &iter, pos, flags);
kfree(iov);
return ret;
}
@@ -991,7 +1003,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
if (ret < 0)
return ret;

- ret = __do_readv_writev(WRITE, file, &iter, pos, flags);
+ ret = do_iter_write(file, &iter, pos, flags);
kfree(iov);
return ret;
}
@@ -1161,7 +1173,7 @@ static size_t compat_readv(struct file *file,
ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
if (ret < 0)
goto out;
- ret = __do_readv_writev(READ, file, &iter, pos, flags);
+ ret = do_iter_read(file, &iter, pos, flags);
kfree(iov);
out:
if (ret > 0)
@@ -1274,7 +1286,7 @@ static size_t compat_writev(struct file *file,
ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
if (ret < 0)
goto out;
- ret = __do_readv_writev(WRITE, file, &iter, pos, flags);
+ ret = do_iter_write(file, &iter, pos, flags);
kfree(iov);
out:
if (ret > 0)
--
2.11.0


2017-05-27 08:17:34

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 08/10] fs: implement vfs_iter_write using do_iter_write

De-dupliate some code and allow for passing the flags argument to
vfs_iter_write. Additionally it now properly updates timestamps.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/block/loop.c | 2 +-
drivers/target/target_core_file.c | 4 ++--
fs/coda/file.c | 2 +-
fs/read_write.c | 29 +++++++++--------------------
fs/splice.c | 2 +-
include/linux/fs.h | 3 ++-
6 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1070e851ad9a..1802142cca4b 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -266,7 +266,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
iov_iter_bvec(&i, ITER_BVEC, bvec, 1, bvec->bv_len);

file_start_write(file);
- bw = vfs_iter_write(file, &i, ppos);
+ bw = vfs_iter_write(file, &i, ppos, 0);
file_end_write(file);

if (likely(bw == bvec->bv_len))
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 2befc0d7fdb1..e921948415c7 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -273,7 +273,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,

iov_iter_bvec(&iter, ITER_BVEC, bvec, sgl_nents, len);
if (is_write)
- ret = vfs_iter_write(fd, &iter, &pos);
+ ret = vfs_iter_write(fd, &iter, &pos, 0);
else
ret = vfs_iter_read(fd, &iter, &pos, 0);

@@ -409,7 +409,7 @@ fd_execute_write_same(struct se_cmd *cmd)
}

iov_iter_bvec(&iter, ITER_BVEC, bvec, nolb, len);
- ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos);
+ ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, 0);

kfree(bvec);
if (ret < 0 || ret != len) {
diff --git a/fs/coda/file.c b/fs/coda/file.c
index f1102822bcfd..363402fcb3ed 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -51,7 +51,7 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
host_file = cfi->cfi_container;
file_start_write(host_file);
inode_lock(coda_inode);
- ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos);
+ ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0);
coda_inode->i_size = file_inode(host_file)->i_size;
coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
coda_inode->i_mtime = coda_inode->i_ctime = current_time(coda_inode);
diff --git a/fs/read_write.c b/fs/read_write.c
index 620ac6f9dacb..f48caaf93216 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -356,26 +356,6 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
}
#endif

-ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- if (!file->f_op->write_iter)
- return -EINVAL;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
-
- iter->type |= WRITE;
- ret = call_write_iter(file, &kiocb, iter);
- BUG_ON(ret == -EIOCBQUEUED);
- if (ret > 0)
- *ppos = kiocb.ki_pos;
- return ret;
-}
-EXPORT_SYMBOL(vfs_iter_write);
-
int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
{
struct inode *inode;
@@ -973,6 +953,15 @@ static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
return ret;
}

+ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
+ int flags)
+{
+ if (!file->f_op->write_iter)
+ return -EINVAL;
+ return do_iter_write(file, iter, ppos, flags);
+}
+EXPORT_SYMBOL(vfs_iter_write);
+
ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, int flags)
{
diff --git a/fs/splice.c b/fs/splice.c
index 540c4a44756c..ae41201d0325 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -762,7 +762,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,

iov_iter_bvec(&from, ITER_BVEC | WRITE, array, n,
sd.total_len - left);
- ret = vfs_iter_write(out, &from, &sd.pos);
+ ret = vfs_iter_write(out, &from, &sd.pos, 0);
if (ret <= 0)
break;

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f6a4f4efb32..c67f1f8ee789 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2791,7 +2791,8 @@ extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);

ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
int flags);
-ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos);
+ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
+ int flags);

/* fs/block_dev.c */
extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to);
--
2.11.0


2017-05-27 08:17:30

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 05/10] fs: move more code into do_iter_read/do_iter_write

The checks for the permissions and can read / write flags are common
for the callers.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/read_write.c | 73 ++++++++++++++++++++++-----------------------------------
1 file changed, 28 insertions(+), 45 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index f453c9a485b9..64b61a032a56 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -922,6 +922,11 @@ static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
size_t tot_len;
ssize_t ret = 0;

+ if (!(file->f_mode & FMODE_READ))
+ return -EBADF;
+ if (!(file->f_mode & FMODE_CAN_READ))
+ return -EINVAL;
+
tot_len = iov_iter_count(iter);
if (!tot_len)
goto out;
@@ -945,6 +950,11 @@ static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
size_t tot_len;
ssize_t ret = 0;

+ if (!(file->f_mode & FMODE_WRITE))
+ return -EBADF;
+ if (!(file->f_mode & FMODE_CAN_WRITE))
+ return -EINVAL;
+
tot_len = iov_iter_count(iter);
if (!tot_len)
return 0;
@@ -971,17 +981,12 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
struct iov_iter iter;
ssize_t ret;

- if (!(file->f_mode & FMODE_READ))
- return -EBADF;
- if (!(file->f_mode & FMODE_CAN_READ))
- return -EINVAL;
-
ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
- if (ret < 0)
- return ret;
+ if (ret >= 0) {
+ ret = do_iter_read(file, &iter, pos, flags);
+ kfree(iov);
+ }

- ret = do_iter_read(file, &iter, pos, flags);
- kfree(iov);
return ret;
}
EXPORT_SYMBOL(vfs_readv);
@@ -994,17 +999,11 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
struct iov_iter iter;
ssize_t ret;

- if (!(file->f_mode & FMODE_WRITE))
- return -EBADF;
- if (!(file->f_mode & FMODE_CAN_WRITE))
- return -EINVAL;
-
ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
- if (ret < 0)
- return ret;
-
- ret = do_iter_write(file, &iter, pos, flags);
- kfree(iov);
+ if (ret >= 0) {
+ ret = do_iter_write(file, &iter, pos, flags);
+ kfree(iov);
+ }
return ret;
}
EXPORT_SYMBOL(vfs_writev);
@@ -1161,21 +1160,13 @@ static size_t compat_readv(struct file *file,
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
- ssize_t ret = -EBADF;
-
- if (!(file->f_mode & FMODE_READ))
- goto out;
-
- ret = -EINVAL;
- if (!(file->f_mode & FMODE_CAN_READ))
- goto out;
+ ssize_t ret;

ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
- if (ret < 0)
- goto out;
- ret = do_iter_read(file, &iter, pos, flags);
- kfree(iov);
-out:
+ if (ret >= 0) {
+ ret = do_iter_read(file, &iter, pos, flags);
+ kfree(iov);
+ }
if (ret > 0)
add_rchar(current, ret);
inc_syscr(current);
@@ -1274,21 +1265,13 @@ static size_t compat_writev(struct file *file,
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
- ssize_t ret = -EBADF;
-
- if (!(file->f_mode & FMODE_WRITE))
- goto out;
-
- ret = -EINVAL;
- if (!(file->f_mode & FMODE_CAN_WRITE))
- goto out;
+ ssize_t ret;

ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
- if (ret < 0)
- goto out;
- ret = do_iter_write(file, &iter, pos, flags);
- kfree(iov);
-out:
+ if (ret >= 0) {
+ ret = do_iter_write(file, &iter, pos, flags);
+ kfree(iov);
+ }
if (ret > 0)
add_wchar(current, ret);
inc_syscw(current);
--
2.11.0


2017-05-27 08:17:43

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 09/10] nfsd: use vfs_iter_read/write

Instead of messing with the address limit to use vfs_read/vfs_writev.

Note that this requires that exported file implement ->read_iter and
->write_iter. All currently exportable file systems do this.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/nfsd/vfs.c | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2be32955d7f2..37a03c456b38 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -911,13 +911,12 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp,
__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
unsigned long *count)
{
- mm_segment_t oldfs;
+ struct iov_iter iter;
int host_err;

- oldfs = get_fs();
- set_fs(KERNEL_DS);
- host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset, 0);
- set_fs(oldfs);
+ iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
+ host_err = vfs_iter_read(file, &iter, &offset, 0);
+
return nfsd_finish_read(file, count, host_err);
}

@@ -974,7 +973,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
unsigned long *cnt, int stable)
{
struct svc_export *exp;
- mm_segment_t oldfs;
+ struct iov_iter iter;
__be32 err = 0;
int host_err;
int use_wgather;
@@ -1000,10 +999,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (stable && !use_wgather)
flags |= RWF_SYNC;

- /* Write the data. */
- oldfs = get_fs(); set_fs(KERNEL_DS);
- host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos, flags);
- set_fs(oldfs);
+ iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt);
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0)
goto out_nfserr;
*cnt = host_err;
--
2.11.0


2017-05-27 08:17:53

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 10/10] nfsd: remove nfsd_vfs_read

Simpler done in the only caller.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/nfsd/vfs.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 37a03c456b38..0d19ab1b9404 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -920,16 +920,6 @@ __be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
return nfsd_finish_read(file, count, host_err);
}

-static __be32
-nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
-{
- if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
- return nfsd_splice_read(rqstp, file, offset, count);
- else
- return nfsd_readv(file, offset, vec, vlen, count);
-}
-
/*
* Gathered writes: If another process is currently writing to the file,
* there's a high chance this is another nfsd (triggered by a bulk write
@@ -1041,7 +1031,12 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
ra = nfsd_init_raparms(file);

trace_read_opened(rqstp, fhp, offset, vlen);
- err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
+
+ if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
+ err = nfsd_splice_read(rqstp, file, offset, count);
+ else
+ err = nfsd_readv(file, offset, vec, vlen, count);
+
trace_read_io_done(rqstp, fhp, offset, vlen);

if (ra)
--
2.11.0


2017-05-27 08:19:46

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 06/10] fs: set kernel address limit in do_loop_readv_writev

Arg. This patch has been NAKed by Al, but got stuck in the series.
It's not actually needed by the latter patches and can be discarded.

On Sat, May 27, 2017 at 11:16:50AM +0300, Christoph Hellwig wrote:
> This will allow to use all iov_iter based read/write for ITER_KVEC
> without having to check if the file has the iter version of the
> read/write ops, which will allow us to get rid of a large number
> of get_fs/set_fs calls in drivers.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> fs/read_write.c | 11 +++++++++++
> 1 file changed, 11 insertions(+)
>
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 64b61a032a56..60c64a996ab2 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -704,9 +704,18 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
> loff_t *ppos, int type, int flags)
> {
> ssize_t ret = 0;
> + mm_segment_t uninitialized_var(old_fs);
>
> if (flags & ~RWF_HIPRI)
> return -EOPNOTSUPP;
> + if (iter->type & (ITER_BVEC | ITER_PIPE))
> + return -EOPNOTSUPP;
> +
> + /* adjust the address limit for in-kernel I/O */
> + if (iter->type & ITER_KVEC) {
> + old_fs = get_fs();
> + set_fs(get_ds());
> + }
>
> while (iov_iter_count(iter)) {
> struct iovec iovec = iov_iter_iovec(iter);
> @@ -731,6 +740,8 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
> iov_iter_advance(iter, nr);
> }
>
> + if (iter->type & ITER_KVEC)
> + set_fs(old_fs);
> return ret;
> }
>
> --
> 2.11.0
---end quoted text---

2017-05-27 19:32:42

by Al Viro

[permalink] [raw]
Subject: Re: clean up readv/writev helpers

On Sat, May 27, 2017 at 11:16:44AM +0300, Christoph Hellwig wrote:
> Hi Al,
>
> this series cleans up how we implement the various vectored read and write
> syscalls, as well as the internal iov_iter based APIs. It then switches
> nfsd to use vfs_iter_read/write which cleans up nfsd. Note that all
> exportable file systems already implement ->read_iter and ->write_iter
> anyway.

1--5 and 7--10 applied.

2017-05-30 18:02:49

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 09/10] nfsd: use vfs_iter_read/write

ACK.--b.

On Sat, May 27, 2017 at 11:16:53AM +0300, Christoph Hellwig wrote:
> Instead of messing with the address limit to use vfs_read/vfs_writev.
>
> Note that this requires that exported file implement ->read_iter and
> ->write_iter. All currently exportable file systems do this.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> fs/nfsd/vfs.c | 17 +++++++----------
> 1 file changed, 7 insertions(+), 10 deletions(-)
>
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 2be32955d7f2..37a03c456b38 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -911,13 +911,12 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp,
> __be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
> unsigned long *count)
> {
> - mm_segment_t oldfs;
> + struct iov_iter iter;
> int host_err;
>
> - oldfs = get_fs();
> - set_fs(KERNEL_DS);
> - host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset, 0);
> - set_fs(oldfs);
> + iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
> + host_err = vfs_iter_read(file, &iter, &offset, 0);
> +
> return nfsd_finish_read(file, count, host_err);
> }
>
> @@ -974,7 +973,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
> unsigned long *cnt, int stable)
> {
> struct svc_export *exp;
> - mm_segment_t oldfs;
> + struct iov_iter iter;
> __be32 err = 0;
> int host_err;
> int use_wgather;
> @@ -1000,10 +999,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
> if (stable && !use_wgather)
> flags |= RWF_SYNC;
>
> - /* Write the data. */
> - oldfs = get_fs(); set_fs(KERNEL_DS);
> - host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos, flags);
> - set_fs(oldfs);
> + iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt);
> + host_err = vfs_iter_write(file, &iter, &pos, flags);
> if (host_err < 0)
> goto out_nfserr;
> *cnt = host_err;
> --
> 2.11.0

2017-05-30 18:03:22

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 10/10] nfsd: remove nfsd_vfs_read

ACK.--b.

On Sat, May 27, 2017 at 11:16:54AM +0300, Christoph Hellwig wrote:
> Simpler done in the only caller.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> fs/nfsd/vfs.c | 17 ++++++-----------
> 1 file changed, 6 insertions(+), 11 deletions(-)
>
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 37a03c456b38..0d19ab1b9404 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -920,16 +920,6 @@ __be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
> return nfsd_finish_read(file, count, host_err);
> }
>
> -static __be32
> -nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file,
> - loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
> -{
> - if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
> - return nfsd_splice_read(rqstp, file, offset, count);
> - else
> - return nfsd_readv(file, offset, vec, vlen, count);
> -}
> -
> /*
> * Gathered writes: If another process is currently writing to the file,
> * there's a high chance this is another nfsd (triggered by a bulk write
> @@ -1041,7 +1031,12 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
> ra = nfsd_init_raparms(file);
>
> trace_read_opened(rqstp, fhp, offset, vlen);
> - err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
> +
> + if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
> + err = nfsd_splice_read(rqstp, file, offset, count);
> + else
> + err = nfsd_readv(file, offset, vec, vlen, count);
> +
> trace_read_io_done(rqstp, fhp, offset, vlen);
>
> if (ra)
> --
> 2.11.0

2017-06-28 14:35:13

by Christoph Hellwig

[permalink] [raw]
Subject: Re: clean up readv/writev helpers

On Sat, May 27, 2017 at 08:32:39PM +0100, Al Viro wrote:
> On Sat, May 27, 2017 at 11:16:44AM +0300, Christoph Hellwig wrote:
> > Hi Al,
> >
> > this series cleans up how we implement the various vectored read and write
> > syscalls, as well as the internal iov_iter based APIs. It then switches
> > nfsd to use vfs_iter_read/write which cleans up nfsd. Note that all
> > exportable file systems already implement ->read_iter and ->write_iter
> > anyway.
>
> 1--5 and 7--10 applied.

Yet these changes don't seem to be in linux-next. Where did you apply
them to?

2017-06-28 16:37:43

by Al Viro

[permalink] [raw]
Subject: Re: clean up readv/writev helpers

On Wed, Jun 28, 2017 at 04:35:11PM +0200, Christoph Hellwig wrote:
> On Sat, May 27, 2017 at 08:32:39PM +0100, Al Viro wrote:
> > On Sat, May 27, 2017 at 11:16:44AM +0300, Christoph Hellwig wrote:
> > > Hi Al,
> > >
> > > this series cleans up how we implement the various vectored read and write
> > > syscalls, as well as the internal iov_iter based APIs. It then switches
> > > nfsd to use vfs_iter_read/write which cleans up nfsd. Note that all
> > > exportable file systems already implement ->read_iter and ->write_iter
> > > anyway.
> >
> > 1--5 and 7--10 applied.
>
> Yet these changes don't seem to be in linux-next. Where did you apply
> them to?

#work.read_write, as part of #work.uaccess. I apologize for being late
with #for-next this cycle (putting it very mildly); hopefull I'll have
something working by tonight. Hell, quite a bit is still a tangled mess
in local queues... ;-/