2020-02-24 08:34:07

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH v4 0/3] io_uring: add splice(2) support

*on top of for-5.6 + async patches*

Not the fastets implementation, but I'd need to stir up/duplicate
splice.c bits to do it more efficiently.

note: rebase on top of the recent inflight patchset.

v2:
- u32 len and SQE layout changes (Jens)
- output file is in sqe->fd for automatic hash_reg_file support
- handle unbound_nonreg_file for the second fd
- file leaks fixed with REQ_F_NEED_CLEANUP
- place SPLICE_F_FD_IN_FIXED in splice flags (Jens)
- loff_t* -> loff_t, -1 means not specified offset

v3: [PATCH 3/3] changes
- fd u32 -> s32 (Stefan Metzmacher)
- add BUILD_BUG_SQE_ELEM() (Stefan Metzmacher)
- accept and ignore ioprio (Stefan Metzmacher)
- off_in -> splice_off_in

v4:
- rebase + a bit of function renaming
- make file_get/put accept req instead of ctx (Jens)
- fix lost REQ_F_FIXED_FILE

Pavel Begunkov (3):
splice: make do_splice public
io_uring: add interface for getting files
io_uring: add splice(2) support

fs/io_uring.c | 181 ++++++++++++++++++++++++++++------
fs/splice.c | 6 +-
include/linux/splice.h | 3 +
include/uapi/linux/io_uring.h | 14 ++-
4 files changed, 171 insertions(+), 33 deletions(-)

--
2.24.0


2020-02-24 08:34:09

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH v4 1/3] splice: make do_splice public

Make do_splice(), so other kernel parts can reuse it

Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/splice.c | 6 +++---
include/linux/splice.h | 3 +++
2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index d671936d0aad..4735defc46ee 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1109,9 +1109,9 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
/*
* Determine where to splice to/from.
*/
-static long do_splice(struct file *in, loff_t __user *off_in,
- struct file *out, loff_t __user *off_out,
- size_t len, unsigned int flags)
+long do_splice(struct file *in, loff_t __user *off_in,
+ struct file *out, loff_t __user *off_out,
+ size_t len, unsigned int flags)
{
struct pipe_inode_info *ipipe;
struct pipe_inode_info *opipe;
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 74b4911ac16d..ebbbfea48aa0 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -78,6 +78,9 @@ extern ssize_t add_to_pipe(struct pipe_inode_info *,
struct pipe_buffer *);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
splice_direct_actor *);
+extern long do_splice(struct file *in, loff_t __user *off_in,
+ struct file *out, loff_t __user *off_out,
+ size_t len, unsigned int flags);

/*
* for dynamic pipe sizing
--
2.24.0

2020-02-24 08:34:18

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH v4 2/3] io_uring: add interface for getting files

Preparation without functional changes. Adds io_get_file(), that allows
to grab files not only into req->file.

Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 72 ++++++++++++++++++++++++++++++---------------------
1 file changed, 43 insertions(+), 29 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b149b6e080c5..443870e0dc46 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1258,6 +1258,15 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
return NULL;
}

+static inline void io_put_file(struct io_kiocb *req, struct file *file,
+ bool fixed)
+{
+ if (fixed)
+ percpu_ref_put(&req->ctx->file_data->refs);
+ else
+ fput(file);
+}
+
static void __io_req_do_free(struct io_kiocb *req)
{
if (likely(!io_is_fallback_req(req)))
@@ -1268,18 +1277,12 @@ static void __io_req_do_free(struct io_kiocb *req)

static void __io_req_aux_free(struct io_kiocb *req)
{
- struct io_ring_ctx *ctx = req->ctx;
-
if (req->flags & REQ_F_NEED_CLEANUP)
io_cleanup_req(req);

kfree(req->io);
- if (req->file) {
- if (req->flags & REQ_F_FIXED_FILE)
- percpu_ref_put(&ctx->file_data->refs);
- else
- fput(req->file);
- }
+ if (req->file)
+ io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));

io_req_work_drop_env(req);
}
@@ -1849,7 +1852,7 @@ static void io_file_put(struct io_submit_state *state)
* assuming most submissions are for one file, or at least that each file
* has more than one submission.
*/
-static struct file *io_file_get(struct io_submit_state *state, int fd)
+static struct file *__io_file_get(struct io_submit_state *state, int fd)
{
if (!state)
return fget(fd);
@@ -4567,41 +4570,52 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
return table->files[index & IORING_FILE_TABLE_MASK];;
}

-static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
+ int fd, struct file **out_file, bool fixed)
{
struct io_ring_ctx *ctx = req->ctx;
- unsigned flags;
- int fd;
-
- flags = READ_ONCE(sqe->flags);
- fd = READ_ONCE(sqe->fd);
-
- if (!io_req_needs_file(req, fd))
- return 0;
+ struct file *file;

- if (flags & IOSQE_FIXED_FILE) {
+ if (fixed) {
if (unlikely(!ctx->file_data ||
(unsigned) fd >= ctx->nr_user_files))
return -EBADF;
fd = array_index_nospec(fd, ctx->nr_user_files);
- req->file = io_file_from_index(ctx, fd);
- if (!req->file)
+ file = io_file_from_index(ctx, fd);
+ if (!file)
return -EBADF;
- req->flags |= REQ_F_FIXED_FILE;
percpu_ref_get(&ctx->file_data->refs);
} else {
- if (req->needs_fixed_file)
- return -EBADF;
trace_io_uring_file_get(ctx, fd);
- req->file = io_file_get(state, fd);
- if (unlikely(!req->file))
+ file = __io_file_get(state, fd);
+ if (unlikely(!file))
return -EBADF;
}

+ *out_file = file;
return 0;
}

+static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ unsigned flags;
+ int fd;
+ bool fixed;
+
+ flags = READ_ONCE(sqe->flags);
+ fd = READ_ONCE(sqe->fd);
+
+ if (!io_req_needs_file(req, fd))
+ return 0;
+
+ fixed = (flags & IOSQE_FIXED_FILE);
+ if (unlikely(!fixed && req->needs_fixed_file))
+ return -EBADF;
+
+ return io_file_get(state, req, fd, &req->file, fixed);
+}
+
static int io_grab_files(struct io_kiocb *req)
{
int ret = -EBADF;
@@ -4846,8 +4860,8 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}

/* same numerical values with corresponding REQ_F_*, safe to copy */
- req->flags |= sqe_flags & (IOSQE_IO_DRAIN|IOSQE_IO_HARDLINK|
- IOSQE_ASYNC);
+ req->flags |= sqe_flags & (IOSQE_IO_DRAIN | IOSQE_IO_HARDLINK |
+ IOSQE_ASYNC | IOSQE_FIXED_FILE);

ret = io_req_set_file(state, req, sqe);
if (unlikely(ret)) {
--
2.24.0

2020-02-24 08:35:03

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH v4 3/3] io_uring: add splice(2) support

Add support for splice(2).

- output file is specified as sqe->fd, so it's handled by generic code
- hash_reg_file handled by generic code as well
- len is 32bit, but should be fine
- the fd_in is registered file, when SPLICE_F_FD_IN_FIXED is set, which
is a splice flag (i.e. sqe->splice_flags).

Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 109 ++++++++++++++++++++++++++++++++++
include/uapi/linux/io_uring.h | 14 ++++-
2 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 443870e0dc46..b9dd94143c30 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -76,6 +76,7 @@
#include <linux/fadvise.h>
#include <linux/eventpoll.h>
#include <linux/fs_struct.h>
+#include <linux/splice.h>

#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -433,6 +434,15 @@ struct io_epoll {
struct epoll_event event;
};

+struct io_splice {
+ struct file *file_out;
+ struct file *file_in;
+ loff_t off_out;
+ loff_t off_in;
+ u64 len;
+ unsigned int flags;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -549,6 +559,7 @@ struct io_kiocb {
struct io_fadvise fadvise;
struct io_madvise madvise;
struct io_epoll epoll;
+ struct io_splice splice;
};

struct io_async_ctx *io;
@@ -749,6 +760,11 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1,
.file_table = 1,
},
+ [IORING_OP_SPLICE] = {
+ .needs_file = 1,
+ .hash_reg_file = 1,
+ .unbound_nonreg_file = 1,
+ }
};

static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -763,6 +779,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
static int io_grab_files(struct io_kiocb *req);
static void io_ring_file_ref_flush(struct fixed_file_data *data);
static void io_cleanup_req(struct io_kiocb *req);
+static int io_file_get(struct io_submit_state *state,
+ struct io_kiocb *req,
+ int fd, struct file **out_file,
+ bool fixed);

static struct kmem_cache *req_cachep;

@@ -2404,6 +2424,77 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
return ret;
}

+static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_splice* sp = &req->splice;
+ unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
+ int ret;
+
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ return 0;
+
+ sp->file_in = NULL;
+ sp->off_in = READ_ONCE(sqe->splice_off_in);
+ sp->off_out = READ_ONCE(sqe->off);
+ sp->len = READ_ONCE(sqe->len);
+ sp->flags = READ_ONCE(sqe->splice_flags);
+
+ if (unlikely(sp->flags & ~valid_flags))
+ return -EINVAL;
+
+ ret = io_file_get(NULL, req, READ_ONCE(sqe->splice_fd_in), &sp->file_in,
+ (sp->flags & SPLICE_F_FD_IN_FIXED));
+ if (ret)
+ return ret;
+ req->flags |= REQ_F_NEED_CLEANUP;
+
+ if (!S_ISREG(file_inode(sp->file_in)->i_mode))
+ req->work.flags |= IO_WQ_WORK_UNBOUND;
+
+ return 0;
+}
+
+static bool io_splice_punt(struct file *file)
+{
+ if (get_pipe_info(file))
+ return false;
+ if (!io_file_supports_async(file))
+ return true;
+ return !(file->f_mode & O_NONBLOCK);
+}
+
+static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
+{
+ struct io_splice *sp = &req->splice;
+ struct file *in = sp->file_in;
+ struct file *out = sp->file_out;
+ unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
+ loff_t *poff_in, *poff_out;
+ long ret;
+
+ if (force_nonblock) {
+ if (io_splice_punt(in) || io_splice_punt(out))
+ return -EAGAIN;
+ flags |= SPLICE_F_NONBLOCK;
+ }
+
+ poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
+ poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
+ ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
+ if (force_nonblock && ret == -EAGAIN)
+ return -EAGAIN;
+
+ io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+
+ io_cqring_add_event(req, ret);
+ if (ret != sp->len)
+ req_set_fail_links(req);
+ io_put_req_find_next(req, nxt);
+ return 0;
+}
+
/*
* IORING_OP_NOP just posts a completion event, nothing else.
*/
@@ -4219,6 +4310,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
case IORING_OP_EPOLL_CTL:
ret = io_epoll_ctl_prep(req, sqe);
break;
+ case IORING_OP_SPLICE:
+ ret = io_splice_prep(req, sqe);
+ break;
default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode);
@@ -4281,6 +4375,10 @@ static void io_cleanup_req(struct io_kiocb *req)
case IORING_OP_STATX:
putname(req->open.filename);
break;
+ case IORING_OP_SPLICE:
+ io_put_file(req, req->splice.file_in,
+ (req->splice.flags & SPLICE_F_FD_IN_FIXED));
+ break;
}

req->flags &= ~REQ_F_NEED_CLEANUP;
@@ -4484,6 +4582,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
ret = io_epoll_ctl(req, nxt, force_nonblock);
break;
+ case IORING_OP_SPLICE:
+ if (sqe) {
+ ret = io_splice_prep(req, sqe);
+ if (ret < 0)
+ break;
+ }
+ ret = io_splice(req, nxt, force_nonblock);
+ break;
default:
ret = -EINVAL;
break;
@@ -7225,6 +7331,7 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(8, __u64, off);
BUILD_BUG_SQE_ELEM(8, __u64, addr2);
BUILD_BUG_SQE_ELEM(16, __u64, addr);
+ BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in);
BUILD_BUG_SQE_ELEM(24, __u32, len);
BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags);
BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags);
@@ -7239,9 +7346,11 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(28, __u32, open_flags);
BUILD_BUG_SQE_ELEM(28, __u32, statx_flags);
BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice);
+ BUILD_BUG_SQE_ELEM(28, __u32, splice_flags);
BUILD_BUG_SQE_ELEM(32, __u64, user_data);
BUILD_BUG_SQE_ELEM(40, __u16, buf_index);
BUILD_BUG_SQE_ELEM(42, __u16, personality);
+ BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);

BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 3f7961c1c243..08891cc1c1e7 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -23,7 +23,10 @@ struct io_uring_sqe {
__u64 off; /* offset into file */
__u64 addr2;
};
- __u64 addr; /* pointer to buffer or iovecs */
+ union {
+ __u64 addr; /* pointer to buffer or iovecs */
+ __u64 splice_off_in;
+ };
__u32 len; /* buffer size or number of iovecs */
union {
__kernel_rwf_t rw_flags;
@@ -37,6 +40,7 @@ struct io_uring_sqe {
__u32 open_flags;
__u32 statx_flags;
__u32 fadvise_advice;
+ __u32 splice_flags;
};
__u64 user_data; /* data to be passed back at completion time */
union {
@@ -45,6 +49,7 @@ struct io_uring_sqe {
__u16 buf_index;
/* personality to use, if used */
__u16 personality;
+ __s32 splice_fd_in;
};
__u64 __pad2[3];
};
@@ -113,6 +118,7 @@ enum {
IORING_OP_RECV,
IORING_OP_OPENAT2,
IORING_OP_EPOLL_CTL,
+ IORING_OP_SPLICE,

/* this goes last, obviously */
IORING_OP_LAST,
@@ -128,6 +134,12 @@ enum {
*/
#define IORING_TIMEOUT_ABS (1U << 0)

+/*
+ * sqe->splice_flags
+ * extends splice(2) flags
+ */
+#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
+
/*
* IO completion data structure (Completion Queue Entry)
*/
--
2.24.0

2020-02-24 15:36:16

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH v4 0/3] io_uring: add splice(2) support

On 2/24/20 1:32 AM, Pavel Begunkov wrote:
> *on top of for-5.6 + async patches*
>
> Not the fastets implementation, but I'd need to stir up/duplicate
> splice.c bits to do it more efficiently.
>
> note: rebase on top of the recent inflight patchset.

Let's get this queued up, looks good to go to me. Do you have a few
liburing test cases we can add for this?

--
Jens Axboe

2020-02-24 22:34:54

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH v4 0/3] io_uring: add splice(2) support

On 2/24/20 8:35 AM, Jens Axboe wrote:
> On 2/24/20 1:32 AM, Pavel Begunkov wrote:
>> *on top of for-5.6 + async patches*
>>
>> Not the fastets implementation, but I'd need to stir up/duplicate
>> splice.c bits to do it more efficiently.
>>
>> note: rebase on top of the recent inflight patchset.
>
> Let's get this queued up, looks good to go to me. Do you have a few
> liburing test cases we can add for this?

Seems to me like we have an address space issue for the off_in and
off_out parameters. Why aren't we passing in pointers to these
and making them work like regular splice?


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 792ef01a521c..b0cfd68be8c9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -448,8 +448,8 @@ struct io_epoll {
struct io_splice {
struct file *file_out;
struct file *file_in;
- loff_t off_out;
- loff_t off_in;
+ loff_t __user *off_out;
+ loff_t __user *off_in;
u64 len;
unsigned int flags;
};
@@ -2578,8 +2578,8 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;

sp->file_in = NULL;
- sp->off_in = READ_ONCE(sqe->splice_off_in);
- sp->off_out = READ_ONCE(sqe->off);
+ sp->off_in = u64_to_user_ptr(READ_ONCE(sqe->splice_off_in));
+ sp->off_out = u64_to_user_ptr(READ_ONCE(sqe->off));
sp->len = READ_ONCE(sqe->len);
sp->flags = READ_ONCE(sqe->splice_flags);

@@ -2614,7 +2614,6 @@ static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
struct file *in = sp->file_in;
struct file *out = sp->file_out;
unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
- loff_t *poff_in, *poff_out;
long ret;

if (force_nonblock) {
@@ -2623,9 +2622,7 @@ static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
flags |= SPLICE_F_NONBLOCK;
}

- poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
- poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
- ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
+ ret = do_splice(in, sp->off_in, out, sp->off_out, sp->len, flags);
if (force_nonblock && ret == -EAGAIN)
return -EAGAIN;

--
Jens Axboe

2020-02-24 22:53:10

by Pavel Begunkov

[permalink] [raw]
Subject: Re: [PATCH v4 0/3] io_uring: add splice(2) support

On 25/02/2020 01:34, Jens Axboe wrote:
> On 2/24/20 8:35 AM, Jens Axboe wrote:
>> On 2/24/20 1:32 AM, Pavel Begunkov wrote:
>>> *on top of for-5.6 + async patches*
>>>
>>> Not the fastets implementation, but I'd need to stir up/duplicate
>>> splice.c bits to do it more efficiently.
>>>
>>> note: rebase on top of the recent inflight patchset.
>>
>> Let's get this queued up, looks good to go to me. Do you have a few
>> liburing test cases we can add for this?
>
> Seems to me like we have an address space issue for the off_in and

Is that a problem? From the old fixing thread loop_rw_iter() it appeared
to me, that it's ok to pass a kernel address as a user one.
f_op->write of some implemented through the same copy_to_user().


> off_out parameters. Why aren't we passing in pointers to these
> and making them work like regular splice?

That's one extra copy_to_user() + copy_from_user(), which I hope to remove
in the future. And I'm not really a fan of such API, and would prefer to give
away such tracking to the userspace.

>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 792ef01a521c..b0cfd68be8c9 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -448,8 +448,8 @@ struct io_epoll {
> struct io_splice {
> struct file *file_out;
> struct file *file_in;
> - loff_t off_out;
> - loff_t off_in;
> + loff_t __user *off_out;
> + loff_t __user *off_in;
> u64 len;
> unsigned int flags;
> };
> @@ -2578,8 +2578,8 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> return 0;
>
> sp->file_in = NULL;
> - sp->off_in = READ_ONCE(sqe->splice_off_in);
> - sp->off_out = READ_ONCE(sqe->off);
> + sp->off_in = u64_to_user_ptr(READ_ONCE(sqe->splice_off_in));
> + sp->off_out = u64_to_user_ptr(READ_ONCE(sqe->off));
> sp->len = READ_ONCE(sqe->len);
> sp->flags = READ_ONCE(sqe->splice_flags);
>
> @@ -2614,7 +2614,6 @@ static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
> struct file *in = sp->file_in;
> struct file *out = sp->file_out;
> unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
> - loff_t *poff_in, *poff_out;
> long ret;
>
> if (force_nonblock) {
> @@ -2623,9 +2622,7 @@ static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
> flags |= SPLICE_F_NONBLOCK;
> }
>
> - poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
> - poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
> - ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
> + ret = do_splice(in, sp->off_in, out, sp->off_out, sp->len, flags);
> if (force_nonblock && ret == -EAGAIN)
> return -EAGAIN;
>
>

--
Pavel Begunkov


Attachments:
signature.asc (849.00 B)
OpenPGP digital signature

2020-02-24 22:55:30

by Pavel Begunkov

[permalink] [raw]
Subject: Re: [PATCH v4 0/3] io_uring: add splice(2) support

On 25/02/2020 01:51, Pavel Begunkov wrote:
> On 25/02/2020 01:34, Jens Axboe wrote:
>> On 2/24/20 8:35 AM, Jens Axboe wrote:
>>> On 2/24/20 1:32 AM, Pavel Begunkov wrote:
>>>> *on top of for-5.6 + async patches*
>>>>
>>>> Not the fastets implementation, but I'd need to stir up/duplicate
>>>> splice.c bits to do it more efficiently.
>>>>
>>>> note: rebase on top of the recent inflight patchset.
>>>
>>> Let's get this queued up, looks good to go to me. Do you have a few
>>> liburing test cases we can add for this?
>>
>> Seems to me like we have an address space issue for the off_in and
>
> Is that a problem? From the old fixing thread loop_rw_iter() it appeared
> to me, that it's ok to pass a kernel address as a user one.
> f_op->write of some implemented through the same copy_to_user().

Either I finally need to check myself how the protection is implemented...

>
>> off_out parameters. Why aren't we passing in pointers to these
>> and making them work like regular splice?
>
> That's one extra copy_to_user() + copy_from_user(), which I hope to remove
> in the future. And I'm not really a fan of such API, and would prefer to give
> away such tracking to the userspace.
>
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 792ef01a521c..b0cfd68be8c9 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -448,8 +448,8 @@ struct io_epoll {
>> struct io_splice {
>> struct file *file_out;
>> struct file *file_in;
>> - loff_t off_out;
>> - loff_t off_in;
>> + loff_t __user *off_out;
>> + loff_t __user *off_in;
>> u64 len;
>> unsigned int flags;
>> };
>> @@ -2578,8 +2578,8 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>> return 0;
>>
>> sp->file_in = NULL;
>> - sp->off_in = READ_ONCE(sqe->splice_off_in);
>> - sp->off_out = READ_ONCE(sqe->off);
>> + sp->off_in = u64_to_user_ptr(READ_ONCE(sqe->splice_off_in));
>> + sp->off_out = u64_to_user_ptr(READ_ONCE(sqe->off));
>> sp->len = READ_ONCE(sqe->len);
>> sp->flags = READ_ONCE(sqe->splice_flags);
>>
>> @@ -2614,7 +2614,6 @@ static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
>> struct file *in = sp->file_in;
>> struct file *out = sp->file_out;
>> unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
>> - loff_t *poff_in, *poff_out;
>> long ret;
>>
>> if (force_nonblock) {
>> @@ -2623,9 +2622,7 @@ static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
>> flags |= SPLICE_F_NONBLOCK;
>> }
>>
>> - poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
>> - poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
>> - ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
>> + ret = do_splice(in, sp->off_in, out, sp->off_out, sp->len, flags);
>> if (force_nonblock && ret == -EAGAIN)
>> return -EAGAIN;
>>
>>
>

--
Pavel Begunkov


Attachments:
signature.asc (849.00 B)
OpenPGP digital signature