2020-04-08 06:02:58

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH 0/4] clean early submission path

This is mainly in preparation for future changes, but looks nice
by itself. The last patch fixes a mild vulnerability.

Pavel Begunkov (4):
io_uring: simplify io_get_sqring
io_uring: alloc req only after getting sqe
io_uring: remove req init from io_get_req()
io_uring: don't read user-shared sqe flags twice

fs/io_uring.c | 113 ++++++++++++++++++++++++--------------------------
1 file changed, 54 insertions(+), 59 deletions(-)

--
2.24.0


2020-04-08 06:13:21

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH 1/4] io_uring: simplify io_get_sqring

Make io_get_sqring() to care only about sqes but not initialising
io_kiocb. Also, split it into get + consume, that will be helpful in the
future.

Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 40 ++++++++++++++++++++++------------------
1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 773f55c49cd8..fa6b7bb62616 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5778,8 +5778,7 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
* used, it's important that those reads are done through READ_ONCE() to
* prevent a re-load down the line.
*/
-static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct io_uring_sqe **sqe_ptr)
+static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
{
u32 *sq_array = ctx->sq_array;
unsigned head;
@@ -5793,25 +5792,18 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req,
* though the application is the one updating it.
*/
head = READ_ONCE(sq_array[ctx->cached_sq_head & ctx->sq_mask]);
- if (likely(head < ctx->sq_entries)) {
- /*
- * All io need record the previous position, if LINK vs DARIN,
- * it can be used to mark the position of the first IO in the
- * link list.
- */
- req->sequence = ctx->cached_sq_head;
- *sqe_ptr = &ctx->sq_sqes[head];
- req->opcode = READ_ONCE((*sqe_ptr)->opcode);
- req->user_data = READ_ONCE((*sqe_ptr)->user_data);
- ctx->cached_sq_head++;
- return true;
- }
+ if (likely(head < ctx->sq_entries))
+ return &ctx->sq_sqes[head];

/* drop invalid entries */
- ctx->cached_sq_head++;
ctx->cached_sq_dropped++;
WRITE_ONCE(ctx->rings->sq_dropped, ctx->cached_sq_dropped);
- return false;
+ return NULL;
+}
+
+static inline void io_consume_sqe(struct io_ring_ctx *ctx)
+{
+ ctx->cached_sq_head++;
}

static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
@@ -5855,11 +5847,23 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
submitted = -EAGAIN;
break;
}
- if (!io_get_sqring(ctx, req, &sqe)) {
+ sqe = io_get_sqe(ctx);
+ if (!sqe) {
__io_req_do_free(req);
+ io_consume_sqe(ctx);
break;
}

+ /*
+ * All io need record the previous position, if LINK vs DARIN,
+ * it can be used to mark the position of the first IO in the
+ * link list.
+ */
+ req->sequence = ctx->cached_sq_head;
+ req->opcode = READ_ONCE(sqe->opcode);
+ req->user_data = READ_ONCE(sqe->user_data);
+ io_consume_sqe(ctx);
+
/* will complete beyond this point, count as submitted */
submitted++;

--
2.24.0

2020-04-08 06:24:38

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH 4/4] io_uring: don't read user-shared sqe flags twice

Don't re-read userspace-shared sqe->flags, it can be exploited.
sqe->flags are copied into req->flags in io_submit_sqe(), check them
instead.

Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 072e002f1184..f8173a77434c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2930,7 +2930,7 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)

if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
- if (sqe->flags & IOSQE_FIXED_FILE)
+ if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
@@ -2961,7 +2961,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)

if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
- if (sqe->flags & IOSQE_FIXED_FILE)
+ if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
@@ -3315,7 +3315,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)

if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
- if (sqe->flags & IOSQE_FIXED_FILE)
+ if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
@@ -3392,7 +3392,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
sqe->rw_flags || sqe->buf_index)
return -EINVAL;
- if (sqe->flags & IOSQE_FIXED_FILE)
+ if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;

req->close.fd = READ_ONCE(sqe->fd);
@@ -5363,15 +5363,10 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
}

static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+ int fd, unsigned int flags)
{
- unsigned flags;
- int fd;
bool fixed;

- flags = READ_ONCE(sqe->flags);
- fd = READ_ONCE(sqe->fd);
-
if (!io_req_needs_file(req, fd))
return 0;

@@ -5613,7 +5608,7 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
{
struct io_ring_ctx *ctx = req->ctx;
unsigned int sqe_flags;
- int ret, id;
+ int ret, id, fd;

sqe_flags = READ_ONCE(sqe->flags);

@@ -5644,7 +5639,8 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
IOSQE_ASYNC | IOSQE_FIXED_FILE |
IOSQE_BUFFER_SELECT);

- ret = io_req_set_file(state, req, sqe);
+ fd = READ_ONCE(sqe->fd);
+ ret = io_req_set_file(state, req, fd, sqe_flags);
if (unlikely(ret)) {
err_req:
io_cqring_add_event(req, ret);
--
2.24.0

2020-04-08 06:24:38

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH 3/4] io_uring: remove req init from io_get_req()

io_get_req() do two different things: io_kiocb allocation and
initialisation. Move init part out of it and rename into
io_alloc_req(). It's simpler this way and also have better data
locality.

Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 53 ++++++++++++++++++++++++++-------------------------
1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9c3e920e789f..072e002f1184 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1293,8 +1293,8 @@ static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx)
return NULL;
}

-static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
- struct io_submit_state *state)
+static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx,
+ struct io_submit_state *state)
{
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
struct io_kiocb *req;
@@ -1327,22 +1327,9 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
req = state->reqs[state->free_reqs];
}

-got_it:
- req->io = NULL;
- req->file = NULL;
- req->ctx = ctx;
- req->flags = 0;
- /* one is dropped after submission, the other at completion */
- refcount_set(&req->refs, 2);
- req->task = NULL;
- req->result = 0;
- INIT_IO_WORK(&req->work, io_wq_submit_work);
return req;
fallback:
- req = io_get_fallback_req(ctx);
- if (req)
- goto got_it;
- return NULL;
+ return io_get_fallback_req(ctx);
}

static inline void io_put_file(struct io_kiocb *req, struct file *file,
@@ -5801,6 +5788,28 @@ static inline void io_consume_sqe(struct io_ring_ctx *ctx)
ctx->cached_sq_head++;
}

+static void io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ /*
+ * All io need record the previous position, if LINK vs DARIN,
+ * it can be used to mark the position of the first IO in the
+ * link list.
+ */
+ req->sequence = ctx->cached_sq_head;
+ req->opcode = READ_ONCE(sqe->opcode);
+ req->user_data = READ_ONCE(sqe->user_data);
+ req->io = NULL;
+ req->file = NULL;
+ req->ctx = ctx;
+ req->flags = 0;
+ /* one is dropped after submission, the other at completion */
+ refcount_set(&req->refs, 2);
+ req->task = NULL;
+ req->result = 0;
+ INIT_IO_WORK(&req->work, io_wq_submit_work);
+}
+
static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
struct file *ring_file, int ring_fd,
struct mm_struct **mm, bool async)
@@ -5841,23 +5850,15 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
io_consume_sqe(ctx);
break;
}
- req = io_get_req(ctx, statep);
+ req = io_alloc_req(ctx, statep);
if (unlikely(!req)) {
if (!submitted)
submitted = -EAGAIN;
break;
}

- /*
- * All io need record the previous position, if LINK vs DARIN,
- * it can be used to mark the position of the first IO in the
- * link list.
- */
- req->sequence = ctx->cached_sq_head;
- req->opcode = READ_ONCE(sqe->opcode);
- req->user_data = READ_ONCE(sqe->user_data);
+ io_init_req(ctx, req, sqe);
io_consume_sqe(ctx);
-
/* will complete beyond this point, count as submitted */
submitted++;

--
2.24.0

2020-04-08 15:32:38

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 0/4] clean early submission path

On 4/7/20 10:58 PM, Pavel Begunkov wrote:
> This is mainly in preparation for future changes, but looks nice
> by itself. The last patch fixes a mild vulnerability.
>
> Pavel Begunkov (4):
> io_uring: simplify io_get_sqring
> io_uring: alloc req only after getting sqe
> io_uring: remove req init from io_get_req()
> io_uring: don't read user-shared sqe flags twice
>
> fs/io_uring.c | 113 ++++++++++++++++++++++++--------------------------
> 1 file changed, 54 insertions(+), 59 deletions(-)

Looks good to me, applied. Thanks!

--
Jens Axboe