2020-01-17 22:24:26

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH 1/2] io_uring: remove REQ_F_IO_DRAINED

A request can get into the defer list only once, there is no need for
marking it as drained, so remove it. This probably was left after
extracting __need_defer() for use in timeouts.

Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9ee01c7422cb..163707ac9e76 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -499,7 +499,6 @@ struct io_kiocb {
#define REQ_F_FIXED_FILE 4 /* ctx owns file */
#define REQ_F_LINK_NEXT 8 /* already grabbed next link */
#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
-#define REQ_F_IO_DRAINED 32 /* drain done */
#define REQ_F_LINK 64 /* linked sqes */
#define REQ_F_LINK_TIMEOUT 128 /* has linked timeout */
#define REQ_F_FAIL_LINK 256 /* fail rest of links */
@@ -815,7 +814,7 @@ static inline bool __req_need_defer(struct io_kiocb *req)

static inline bool req_need_defer(struct io_kiocb *req)
{
- if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) == REQ_F_IO_DRAIN)
+ if (unlikely(req->flags & REQ_F_IO_DRAIN))
return __req_need_defer(req);

return false;
@@ -937,10 +936,8 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)

__io_commit_cqring(ctx);

- while ((req = io_get_deferred_req(ctx)) != NULL) {
- req->flags |= REQ_F_IO_DRAINED;
+ while ((req = io_get_deferred_req(ctx)) != NULL)
io_queue_async_work(req);
- }
}

static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
--
2.24.0


2020-01-17 22:24:28

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH 2/2] io_uring: optimise sqe-to-req flags translation

For each IOSQE_* flag there is a corresponding REQ_F_* flag. And there
is a repetitive pattern of their translation:
e.g. if (sqe->flags & SQE_FLAG*) req->flags |= REQ_F_FLAG*

Use the same numeric value/bit for them, so this could be optimised to
check-less copy.

Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 58 +++++++++++++++++++++++++--------------------------
1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 163707ac9e76..859243ae74eb 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -452,6 +452,28 @@ struct io_async_ctx {
};
};

+enum {
+ /* correspond one-to-one to IOSQE_IO_* flags*/
+ REQ_F_FIXED_FILE = IOSQE_FIXED_FILE, /* ctx owns file */
+ REQ_F_IO_DRAIN = IOSQE_IO_DRAIN, /* drain existing IO first */
+ REQ_F_LINK = IOSQE_IO_LINK, /* linked sqes */
+ REQ_F_HARDLINK = IOSQE_IO_HARDLINK, /* doesn't sever on completion < 0 */
+ REQ_F_FORCE_ASYNC = IOSQE_ASYNC, /* IOSQE_ASYNC */
+
+ REQ_F_LINK_NEXT = 1 << 5, /* already grabbed next link */
+ REQ_F_FAIL_LINK = 1 << 6, /* fail rest of links */
+ REQ_F_INFLIGHT = 1 << 7, /* on inflight list */
+ REQ_F_CUR_POS = 1 << 8, /* read/write uses file position */
+ REQ_F_NOWAIT = 1 << 9, /* must not punt to workers */
+ REQ_F_IOPOLL_COMPLETED = 1 << 10, /* polled IO has completed */
+ REQ_F_LINK_TIMEOUT = 1 << 11, /* has linked timeout */
+ REQ_F_TIMEOUT = 1 << 12, /* timeout request */
+ REQ_F_ISREG = 1 << 13, /* regular file */
+ REQ_F_MUST_PUNT = 1 << 14, /* must be punted even for NONBLOCK */
+ REQ_F_TIMEOUT_NOSEQ = 1 << 15, /* no timeout sequence */
+ REQ_F_COMP_LOCKED = 1 << 16, /* completion under lock */
+};
+
/*
* NOTE! Each of the iocb union members has the file pointer
* as the first entry in their struct definition. So you can
@@ -494,23 +516,6 @@ struct io_kiocb {
struct list_head link_list;
unsigned int flags;
refcount_t refs;
-#define REQ_F_NOWAIT 1 /* must not punt to workers */
-#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
-#define REQ_F_FIXED_FILE 4 /* ctx owns file */
-#define REQ_F_LINK_NEXT 8 /* already grabbed next link */
-#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
-#define REQ_F_LINK 64 /* linked sqes */
-#define REQ_F_LINK_TIMEOUT 128 /* has linked timeout */
-#define REQ_F_FAIL_LINK 256 /* fail rest of links */
-#define REQ_F_TIMEOUT 1024 /* timeout request */
-#define REQ_F_ISREG 2048 /* regular file */
-#define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */
-#define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */
-#define REQ_F_INFLIGHT 16384 /* on inflight list */
-#define REQ_F_COMP_LOCKED 32768 /* completion under lock */
-#define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */
-#define REQ_F_FORCE_ASYNC 131072 /* IOSQE_ASYNC */
-#define REQ_F_CUR_POS 262144 /* read/write uses file position */
u64 user_data;
u32 result;
u32 sequence;
@@ -4342,9 +4347,6 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
flags = READ_ONCE(sqe->flags);
fd = READ_ONCE(sqe->fd);

- if (flags & IOSQE_IO_DRAIN)
- req->flags |= REQ_F_IO_DRAIN;
-
if (!io_req_needs_file(req, fd))
return 0;

@@ -4566,6 +4568,12 @@ static inline void io_queue_link_head(struct io_kiocb *req)
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
IOSQE_IO_HARDLINK | IOSQE_ASYNC)

+/*
+ * This should be equal to bitmask of corresponding REQ_F_* flags,
+ * i.e. REQ_F_IO_DRAIN|REQ_F_HARDLINK|REQ_F_FORCE_ASYNC
+ */
+#define SQE_INHERITED_FLAGS (IOSQE_IO_DRAIN|IOSQE_IO_HARDLINK|IOSQE_ASYNC)
+
static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
struct io_submit_state *state, struct io_kiocb **link)
{
@@ -4580,8 +4588,7 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
ret = -EINVAL;
goto err_req;
}
- if (sqe_flags & IOSQE_ASYNC)
- req->flags |= REQ_F_FORCE_ASYNC;
+ req->flags |= sqe_flags & SQE_INHERITED_FLAGS;

ret = io_req_set_file(state, req, sqe);
if (unlikely(ret)) {
@@ -4605,10 +4612,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
head->flags |= REQ_F_IO_DRAIN;
ctx->drain_next = 1;
}
-
- if (sqe_flags & IOSQE_IO_HARDLINK)
- req->flags |= REQ_F_HARDLINK;
-
if (io_alloc_async_ctx(req)) {
ret = -EAGAIN;
goto err_req;
@@ -4635,9 +4638,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
if (sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
req->flags |= REQ_F_LINK;
- if (sqe_flags & IOSQE_IO_HARDLINK)
- req->flags |= REQ_F_HARDLINK;
-
INIT_LIST_HEAD(&req->link_list);
ret = io_req_defer_prep(req, sqe);
if (ret)
--
2.24.0

2020-01-17 22:43:30

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH 0/2] optimise sqe-to-req flags

*lost the cover-letter, but here we go*

The main idea is to optimise code like the following by directly
copying sqe flags:

if (sqe_flags & IOSQE_IO_HARDLINK)
req->flags |= REQ_F_HARDLINK;

The first patch is a minor cleanup, and the second one do the
trick. No functional changes.

The other thing to consider is whether to use such flags as
REQ_F_LINK = IOSQE_IO_LINK, or directly use IOSQE_IO_LINK instead.

Pavel Begunkov (2):
io_uring: remove REQ_F_IO_DRAINED
io_uring: optimise sqe-to-req flags translation

fs/io_uring.c | 65 ++++++++++++++++++++++++---------------------------
1 file changed, 31 insertions(+), 34 deletions(-)

--
2.24.0

2020-01-17 22:52:01

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 0/2] optimise sqe-to-req flags

On 1/17/20 3:41 PM, Pavel Begunkov wrote:
> *lost the cover-letter, but here we go*
>
> The main idea is to optimise code like the following by directly
> copying sqe flags:
>
> if (sqe_flags & IOSQE_IO_HARDLINK)
> req->flags |= REQ_F_HARDLINK;
>
> The first patch is a minor cleanup, and the second one do the
> trick. No functional changes.
>
> The other thing to consider is whether to use such flags as
> REQ_F_LINK = IOSQE_IO_LINK, or directly use IOSQE_IO_LINK instead.

I think we should keep the names separate. I think it looks fine, though
I do wish that we could just have both in an enum and not have to do
weird naming. We sometimes do:

enum {
__REQ_F_FOO
};

#define REQ_F_FOO (1U << __REQ_F_FOO)

and with that we could have things Just Work in terms of numbering, if
we keep the overlapped values at the start. Would need IOSQE_* to also
use an enum, ala:

enum {
__IOSQE_FIXED_FILE,
__IOSQE_IO_DRAIN,
...
};

and then do

#define IOSQE_FIXED_FILE (1U << __IOSQE_FIXED_FILE)

and have the __REQ enum start with

enum {
__REQ_F_FIXED_FILE = __IOSQE_FIXED_FILE,
__REQ_F_IO_DRAIN = __IOSQE_IO_DRAIN,
...
__REQ_F_LINK_NEXT,
__REQ_F_FAIL_LINK,
...
};

--
Jens Axboe

2020-01-17 22:52:01

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 1/2] io_uring: remove REQ_F_IO_DRAINED

On 1/17/20 3:22 PM, Pavel Begunkov wrote:
> A request can get into the defer list only once, there is no need for
> marking it as drained, so remove it. This probably was left after
> extracting __need_defer() for use in timeouts.

Applied - waiting with 2/2 until we're done discussing the approach
there.

--
Jens Axboe

2020-01-17 23:16:47

by Pavel Begunkov

[permalink] [raw]
Subject: Re: [PATCH 0/2] optimise sqe-to-req flags

On 18/01/2020 01:49, Jens Axboe wrote:
> On 1/17/20 3:41 PM, Pavel Begunkov wrote:
>> *lost the cover-letter, but here we go*
>>
>> The main idea is to optimise code like the following by directly
>> copying sqe flags:
>>
>> if (sqe_flags & IOSQE_IO_HARDLINK)
>> req->flags |= REQ_F_HARDLINK;
>>
>> The first patch is a minor cleanup, and the second one do the
>> trick. No functional changes.
>>
>> The other thing to consider is whether to use such flags as
>> REQ_F_LINK = IOSQE_IO_LINK, or directly use IOSQE_IO_LINK instead.
>
> I think we should keep the names separate. I think it looks fine, though
> I do wish that we could just have both in an enum and not have to do
> weird naming. We sometimes do:
>
> enum {
> __REQ_F_FOO
> };
>
> #define REQ_F_FOO (1U << __REQ_F_FOO)
>

I thought it will be too bulky as it needs retyping the same name many times.
Though, it solves numbering problem and is less error-prone indeed. Let me to
play with it a bit.

BTW, there is another issue from development perspective -- it's harder to find
from where a flag is came. E.g. search for REQ_F_FOO won't give you a place,
where it was set. SQE_INHERITED_FLAGS is close in the code to its usage exactly
for this reason.


> and with that we could have things Just Work in terms of numbering, if
> we keep the overlapped values at the start. Would need IOSQE_* to also
> use an enum, ala:
>
> enum {
> __IOSQE_FIXED_FILE,
> __IOSQE_IO_DRAIN,
> ...
> };
>

I tried to not modify the userspace header. Wouldn't it be better to add _BIT
postfix instead of the underscores?

> and then do
>
> #define IOSQE_FIXED_FILE (1U << __IOSQE_FIXED_FILE)
>
> and have the __REQ enum start with
>
> enum {
> __REQ_F_FIXED_FILE = __IOSQE_FIXED_FILE,
> __REQ_F_IO_DRAIN = __IOSQE_IO_DRAIN,
> ...
> __REQ_F_LINK_NEXT,
> __REQ_F_FAIL_LINK,
> ...
> };
>

--
Pavel Begunkov


Attachments:
signature.asc (849.00 B)
OpenPGP digital signature

2020-01-18 00:20:11

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 0/2] optimise sqe-to-req flags

On 1/17/20 4:14 PM, Pavel Begunkov wrote:
> On 18/01/2020 01:49, Jens Axboe wrote:
>> On 1/17/20 3:41 PM, Pavel Begunkov wrote:
>>> *lost the cover-letter, but here we go*
>>>
>>> The main idea is to optimise code like the following by directly
>>> copying sqe flags:
>>>
>>> if (sqe_flags & IOSQE_IO_HARDLINK)
>>> req->flags |= REQ_F_HARDLINK;
>>>
>>> The first patch is a minor cleanup, and the second one do the
>>> trick. No functional changes.
>>>
>>> The other thing to consider is whether to use such flags as
>>> REQ_F_LINK = IOSQE_IO_LINK, or directly use IOSQE_IO_LINK instead.
>>
>> I think we should keep the names separate. I think it looks fine, though
>> I do wish that we could just have both in an enum and not have to do
>> weird naming. We sometimes do:
>>
>> enum {
>> __REQ_F_FOO
>> };
>>
>> #define REQ_F_FOO (1U << __REQ_F_FOO)
>>
>
> I thought it will be too bulky as it needs retyping the same name many
> times. Though, it solves numbering problem and is less error-prone
> indeed. Let me to play with it a bit.

It's less error prone once the change is done, though the change will be
bigger. I think that's the right tradeoff.

> BTW, there is another issue from development perspective -- it's
> harder to find from where a flag is came. E.g. search for REQ_F_FOO
> won't give you a place, where it was set. SQE_INHERITED_FLAGS is close
> in the code to its usage exactly
> for this reason.

Since it's just that one spot, add a comment with the flag names or get
rid of the SQE_INHERITED_FLAGS define. That'll make it easy to find.

>> and with that we could have things Just Work in terms of numbering, if
>> we keep the overlapped values at the start. Would need IOSQE_* to also
>> use an enum, ala:
>>
>> enum {
>> __IOSQE_FIXED_FILE,
>> __IOSQE_IO_DRAIN,
>> ...
>> };
>>
>
> I tried to not modify the userspace header. Wouldn't it be better to
> add _BIT postfix instead of the underscores?

No strong preference, I usually do the underscores, but not a big deal
to me. There's also BIT_* helpers to make the masks, we should use
those.

--
Jens Axboe

2020-01-18 10:32:50

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH v2] io_uring: optimise sqe-to-req flags translation

For each IOSQE_* flag there is a corresponding REQ_F_* flag. And there
is a repetitive pattern of their translation:
e.g. if (sqe->flags & SQE_FLAG*) req->flags |= REQ_F_FLAG*

Use the same numerical values/bits for them, and copy them instead of
manual handling.

Signed-off-by: Pavel Begunkov <[email protected]>
---

v2: enum to generate bits (Jens Axboe)
Comments cross 80 chars, but IMHO more visually appealing

Crosses

fs/io_uring.c | 75 +++++++++++++++++++++--------------
include/uapi/linux/io_uring.h | 26 +++++++++---
2 files changed, 67 insertions(+), 34 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ed1adeda370e..e3e2438a7480 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -452,6 +452,49 @@ struct io_async_ctx {
};
};

+enum {
+ REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
+ REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT,
+ REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT,
+ REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT,
+ REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
+
+ REQ_F_LINK_NEXT_BIT,
+ REQ_F_FAIL_LINK_BIT,
+ REQ_F_INFLIGHT_BIT,
+ REQ_F_CUR_POS_BIT,
+ REQ_F_NOWAIT_BIT,
+ REQ_F_IOPOLL_COMPLETED_BIT,
+ REQ_F_LINK_TIMEOUT_BIT,
+ REQ_F_TIMEOUT_BIT,
+ REQ_F_ISREG_BIT,
+ REQ_F_MUST_PUNT_BIT,
+ REQ_F_TIMEOUT_NOSEQ_BIT,
+ REQ_F_COMP_LOCKED_BIT,
+};
+
+enum {
+ /* correspond one-to-one to IOSQE_IO_* flags*/
+ REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), /* ctx owns file */
+ REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), /* drain existing IO first */
+ REQ_F_LINK = BIT(REQ_F_LINK_BIT), /* linked sqes */
+ REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), /* doesn't sever on completion < 0 */
+ REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), /* IOSQE_ASYNC */
+
+ REQ_F_LINK_NEXT = BIT(REQ_F_LINK_NEXT_BIT), /* already grabbed next link */
+ REQ_F_FAIL_LINK = BIT(REQ_F_FAIL_LINK_BIT), /* fail rest of links */
+ REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), /* on inflight list */
+ REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), /* read/write uses file position */
+ REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), /* must not punt to workers */
+ REQ_F_IOPOLL_COMPLETED = BIT(REQ_F_IOPOLL_COMPLETED_BIT),/* polled IO has completed */
+ REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), /* has linked timeout */
+ REQ_F_TIMEOUT = BIT(REQ_F_TIMEOUT_BIT), /* timeout request */
+ REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), /* regular file */
+ REQ_F_MUST_PUNT = BIT(REQ_F_MUST_PUNT_BIT), /* must be punted even for NONBLOCK */
+ REQ_F_TIMEOUT_NOSEQ = BIT(REQ_F_TIMEOUT_NOSEQ_BIT), /* no timeout sequence */
+ REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT), /* completion under lock */
+};
+
/*
* NOTE! Each of the iocb union members has the file pointer
* as the first entry in their struct definition. So you can
@@ -494,23 +537,6 @@ struct io_kiocb {
struct list_head link_list;
unsigned int flags;
refcount_t refs;
-#define REQ_F_NOWAIT 1 /* must not punt to workers */
-#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
-#define REQ_F_FIXED_FILE 4 /* ctx owns file */
-#define REQ_F_LINK_NEXT 8 /* already grabbed next link */
-#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
-#define REQ_F_LINK 64 /* linked sqes */
-#define REQ_F_LINK_TIMEOUT 128 /* has linked timeout */
-#define REQ_F_FAIL_LINK 256 /* fail rest of links */
-#define REQ_F_TIMEOUT 1024 /* timeout request */
-#define REQ_F_ISREG 2048 /* regular file */
-#define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */
-#define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */
-#define REQ_F_INFLIGHT 16384 /* on inflight list */
-#define REQ_F_COMP_LOCKED 32768 /* completion under lock */
-#define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */
-#define REQ_F_FORCE_ASYNC 131072 /* IOSQE_ASYNC */
-#define REQ_F_CUR_POS 262144 /* read/write uses file position */
u64 user_data;
u32 result;
u32 sequence;
@@ -4355,9 +4381,6 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
flags = READ_ONCE(sqe->flags);
fd = READ_ONCE(sqe->fd);

- if (flags & IOSQE_IO_DRAIN)
- req->flags |= REQ_F_IO_DRAIN;
-
if (!io_req_needs_file(req, fd))
return 0;

@@ -4593,8 +4616,9 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
ret = -EINVAL;
goto err_req;
}
- if (sqe_flags & IOSQE_ASYNC)
- req->flags |= REQ_F_FORCE_ASYNC;
+ /* same numerical values with corresponding REQ_F_*, safe to copy */
+ req->flags |= sqe_flags & (IOSQE_IO_DRAIN|IOSQE_IO_HARDLINK|
+ IOSQE_ASYNC);

ret = io_req_set_file(state, req, sqe);
if (unlikely(ret)) {
@@ -4618,10 +4642,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
head->flags |= REQ_F_IO_DRAIN;
ctx->drain_next = 1;
}
-
- if (sqe_flags & IOSQE_IO_HARDLINK)
- req->flags |= REQ_F_HARDLINK;
-
if (io_alloc_async_ctx(req)) {
ret = -EAGAIN;
goto err_req;
@@ -4648,9 +4668,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
if (sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
req->flags |= REQ_F_LINK;
- if (sqe_flags & IOSQE_IO_HARDLINK)
- req->flags |= REQ_F_HARDLINK;
-
INIT_LIST_HEAD(&req->link_list);
ret = io_req_defer_prep(req, sqe);
if (ret)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 955fd477e530..cee59996b23a 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -10,6 +10,7 @@

#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/bits.h>

/*
* IO submission data structure (Submission Queue Entry)
@@ -45,14 +46,29 @@ struct io_uring_sqe {
};
};

+enum {
+ IOSQE_FIXED_FILE_BIT,
+ IOSQE_IO_DRAIN_BIT,
+ IOSQE_IO_LINK_BIT,
+ IOSQE_IO_HARDLINK_BIT,
+ IOSQE_ASYNC_BIT,
+};
+
/*
* sqe->flags
*/
-#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */
-#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */
-#define IOSQE_IO_LINK (1U << 2) /* links next sqe */
-#define IOSQE_IO_HARDLINK (1U << 3) /* like LINK, but stronger */
-#define IOSQE_ASYNC (1U << 4) /* always go async */
+enum {
+ /* use fixed fileset */
+ IOSQE_FIXED_FILE = BIT(IOSQE_FIXED_FILE_BIT),
+ /* issue after inflight IO */
+ IOSQE_IO_DRAIN = BIT(IOSQE_IO_DRAIN_BIT),
+ /* links next sqe */
+ IOSQE_IO_LINK = BIT(IOSQE_IO_LINK_BIT),
+ /* like LINK, but stronger */
+ IOSQE_IO_HARDLINK = BIT(IOSQE_IO_HARDLINK_BIT),
+ /* always go async */
+ IOSQE_ASYNC = BIT(IOSQE_ASYNC_BIT),
+};

/*
* io_uring_setup() flags
--
2.24.0

2020-01-18 16:36:51

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH v2] io_uring: optimise sqe-to-req flags translation

On 1/18/20 3:24 AM, Pavel Begunkov wrote:
> For each IOSQE_* flag there is a corresponding REQ_F_* flag. And there
> is a repetitive pattern of their translation:
> e.g. if (sqe->flags & SQE_FLAG*) req->flags |= REQ_F_FLAG*
>
> Use the same numerical values/bits for them, and copy them instead of
> manual handling.
>
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
>
> v2: enum to generate bits (Jens Axboe)
> Comments cross 80 chars, but IMHO more visually appealing
>
> Crosses
>
> fs/io_uring.c | 75 +++++++++++++++++++++--------------
> include/uapi/linux/io_uring.h | 26 +++++++++---
> 2 files changed, 67 insertions(+), 34 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index ed1adeda370e..e3e2438a7480 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -452,6 +452,49 @@ struct io_async_ctx {
> };
> };
>
> +enum {
> + REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
> + REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT,
> + REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT,
> + REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT,
> + REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
> +
> + REQ_F_LINK_NEXT_BIT,
> + REQ_F_FAIL_LINK_BIT,
> + REQ_F_INFLIGHT_BIT,
> + REQ_F_CUR_POS_BIT,
> + REQ_F_NOWAIT_BIT,
> + REQ_F_IOPOLL_COMPLETED_BIT,
> + REQ_F_LINK_TIMEOUT_BIT,
> + REQ_F_TIMEOUT_BIT,
> + REQ_F_ISREG_BIT,
> + REQ_F_MUST_PUNT_BIT,
> + REQ_F_TIMEOUT_NOSEQ_BIT,
> + REQ_F_COMP_LOCKED_BIT,
> +};

Perfect

> +enum {
> + /* correspond one-to-one to IOSQE_IO_* flags*/
> + REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), /* ctx owns file */

I'd put the comment on top of the line instead, these lines are way too
long.

> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 955fd477e530..cee59996b23a 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -10,6 +10,7 @@
>
> #include <linux/fs.h>
> #include <linux/types.h>
> +#include <linux/bits.h>
>
> /*
> * IO submission data structure (Submission Queue Entry)
> @@ -45,14 +46,29 @@ struct io_uring_sqe {
> };
> };
>
> +enum {
> + IOSQE_FIXED_FILE_BIT,
> + IOSQE_IO_DRAIN_BIT,
> + IOSQE_IO_LINK_BIT,
> + IOSQE_IO_HARDLINK_BIT,
> + IOSQE_ASYNC_BIT,
> +};
> +
> /*
> * sqe->flags
> */
> -#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */
> -#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */
> -#define IOSQE_IO_LINK (1U << 2) /* links next sqe */
> -#define IOSQE_IO_HARDLINK (1U << 3) /* like LINK, but stronger */
> -#define IOSQE_ASYNC (1U << 4) /* always go async */
> +enum {
> + /* use fixed fileset */
> + IOSQE_FIXED_FILE = BIT(IOSQE_FIXED_FILE_BIT),

Let's please not use BIT() for the user visible part, though. And I'd
leave these as defines, sometimes apps do things ala:

#ifndef IOSQE_IO_LINK
#define IOSQE_IO_LINK ...
#endif

to make it easier to co-exist with old and new headers.

--
Jens Axboe

2020-01-18 17:12:20

by Pavel Begunkov

[permalink] [raw]
Subject: Re: [PATCH v2] io_uring: optimise sqe-to-req flags translation

On 18/01/2020 19:34, Jens Axboe wrote:
>> +enum {
>> + /* correspond one-to-one to IOSQE_IO_* flags*/
>> + REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), /* ctx owns file */
>
> I'd put the comment on top of the line instead, these lines are way too
> long.

I find it less readable, but let's see

>
> Let's please not use BIT() for the user visible part, though. And I'd
> leave these as defines, sometimes apps do things ala:
>
> #ifndef IOSQE_IO_LINK
> #define IOSQE_IO_LINK ...
> #endif
>
> to make it easier to co-exist with old and new headers.
>Yeah, good point!

--
Pavel Begunkov


Attachments:
signature.asc (849.00 B)
OpenPGP digital signature

2020-01-18 17:24:39

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH v3 1/1] io_uring: optimise sqe-to-req flags translation

For each IOSQE_* flag there is a corresponding REQ_F_* flag. And there
is a repetitive pattern of their translation:
e.g. if (sqe->flags & SQE_FLAG*) req->flags |= REQ_F_FLAG*

Use same numeric values/bits for them and copy instead of manual
handling.

Signed-off-by: Pavel Begunkov <[email protected]>
---

v2: enum to generate bits (Jens Axboe)
v3: don't use BIT() for userspace headers (Jens Axboe)
leave IOSQE_* as defines for compatibility (Jens Axboe)

fs/io_uring.c | 92 ++++++++++++++++++++++++-----------
include/uapi/linux/io_uring.h | 23 +++++++--
2 files changed, 81 insertions(+), 34 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ed1adeda370e..cf5bad51f752 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -46,6 +46,7 @@
#include <linux/compat.h>
#include <linux/refcount.h>
#include <linux/uio.h>
+#include <linux/bits.h>

#include <linux/sched/signal.h>
#include <linux/fs.h>
@@ -452,6 +453,65 @@ struct io_async_ctx {
};
};

+enum {
+ REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
+ REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT,
+ REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT,
+ REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT,
+ REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
+
+ REQ_F_LINK_NEXT_BIT,
+ REQ_F_FAIL_LINK_BIT,
+ REQ_F_INFLIGHT_BIT,
+ REQ_F_CUR_POS_BIT,
+ REQ_F_NOWAIT_BIT,
+ REQ_F_IOPOLL_COMPLETED_BIT,
+ REQ_F_LINK_TIMEOUT_BIT,
+ REQ_F_TIMEOUT_BIT,
+ REQ_F_ISREG_BIT,
+ REQ_F_MUST_PUNT_BIT,
+ REQ_F_TIMEOUT_NOSEQ_BIT,
+ REQ_F_COMP_LOCKED_BIT,
+};
+
+enum {
+ /* ctx owns file */
+ REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT),
+ /* drain existing IO first */
+ REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT),
+ /* linked sqes */
+ REQ_F_LINK = BIT(REQ_F_LINK_BIT),
+ /* doesn't sever on completion < 0 */
+ REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT),
+ /* IOSQE_ASYNC */
+ REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT),
+
+ /* already grabbed next link */
+ REQ_F_LINK_NEXT = BIT(REQ_F_LINK_NEXT_BIT),
+ /* fail rest of links */
+ REQ_F_FAIL_LINK = BIT(REQ_F_FAIL_LINK_BIT),
+ /* on inflight list */
+ REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT),
+ /* read/write uses file position */
+ REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
+ /* must not punt to workers */
+ REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
+ /* polled IO has completed */
+ REQ_F_IOPOLL_COMPLETED = BIT(REQ_F_IOPOLL_COMPLETED_BIT),
+ /* has linked timeout */
+ REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
+ /* timeout request */
+ REQ_F_TIMEOUT = BIT(REQ_F_TIMEOUT_BIT),
+ /* regular file */
+ REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
+ /* must be punted even for NONBLOCK */
+ REQ_F_MUST_PUNT = BIT(REQ_F_MUST_PUNT_BIT),
+ /* no timeout sequence */
+ REQ_F_TIMEOUT_NOSEQ = BIT(REQ_F_TIMEOUT_NOSEQ_BIT),
+ /* completion under lock */
+ REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
+};
+
/*
* NOTE! Each of the iocb union members has the file pointer
* as the first entry in their struct definition. So you can
@@ -494,23 +554,6 @@ struct io_kiocb {
struct list_head link_list;
unsigned int flags;
refcount_t refs;
-#define REQ_F_NOWAIT 1 /* must not punt to workers */
-#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
-#define REQ_F_FIXED_FILE 4 /* ctx owns file */
-#define REQ_F_LINK_NEXT 8 /* already grabbed next link */
-#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
-#define REQ_F_LINK 64 /* linked sqes */
-#define REQ_F_LINK_TIMEOUT 128 /* has linked timeout */
-#define REQ_F_FAIL_LINK 256 /* fail rest of links */
-#define REQ_F_TIMEOUT 1024 /* timeout request */
-#define REQ_F_ISREG 2048 /* regular file */
-#define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */
-#define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */
-#define REQ_F_INFLIGHT 16384 /* on inflight list */
-#define REQ_F_COMP_LOCKED 32768 /* completion under lock */
-#define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */
-#define REQ_F_FORCE_ASYNC 131072 /* IOSQE_ASYNC */
-#define REQ_F_CUR_POS 262144 /* read/write uses file position */
u64 user_data;
u32 result;
u32 sequence;
@@ -4355,9 +4398,6 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
flags = READ_ONCE(sqe->flags);
fd = READ_ONCE(sqe->fd);

- if (flags & IOSQE_IO_DRAIN)
- req->flags |= REQ_F_IO_DRAIN;
-
if (!io_req_needs_file(req, fd))
return 0;

@@ -4593,8 +4633,9 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
ret = -EINVAL;
goto err_req;
}
- if (sqe_flags & IOSQE_ASYNC)
- req->flags |= REQ_F_FORCE_ASYNC;
+ /* same numerical values with corresponding REQ_F_*, safe to copy */
+ req->flags |= sqe_flags & (IOSQE_IO_DRAIN|IOSQE_IO_HARDLINK|
+ IOSQE_ASYNC);

ret = io_req_set_file(state, req, sqe);
if (unlikely(ret)) {
@@ -4618,10 +4659,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
head->flags |= REQ_F_IO_DRAIN;
ctx->drain_next = 1;
}
-
- if (sqe_flags & IOSQE_IO_HARDLINK)
- req->flags |= REQ_F_HARDLINK;
-
if (io_alloc_async_ctx(req)) {
ret = -EAGAIN;
goto err_req;
@@ -4648,9 +4685,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
if (sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
req->flags |= REQ_F_LINK;
- if (sqe_flags & IOSQE_IO_HARDLINK)
- req->flags |= REQ_F_HARDLINK;
-
INIT_LIST_HEAD(&req->link_list);
ret = io_req_defer_prep(req, sqe);
if (ret)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 955fd477e530..57d05cc5e271 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -45,14 +45,27 @@ struct io_uring_sqe {
};
};

+enum {
+ IOSQE_FIXED_FILE_BIT,
+ IOSQE_IO_DRAIN_BIT,
+ IOSQE_IO_LINK_BIT,
+ IOSQE_IO_HARDLINK_BIT,
+ IOSQE_ASYNC_BIT,
+};
+
/*
* sqe->flags
*/
-#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */
-#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */
-#define IOSQE_IO_LINK (1U << 2) /* links next sqe */
-#define IOSQE_IO_HARDLINK (1U << 3) /* like LINK, but stronger */
-#define IOSQE_ASYNC (1U << 4) /* always go async */
+/* use fixed fileset */
+#define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT)
+/* issue after inflight IO */
+#define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT)
+/* links next sqe */
+#define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT)
+/* like LINK, but stronger */
+#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
+/* always go async */
+#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)

/*
* io_uring_setup() flags
--
2.24.0

2020-01-18 20:46:21

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH v2] io_uring: optimise sqe-to-req flags translation

On 1/18/20 10:10 AM, Pavel Begunkov wrote:
> On 18/01/2020 19:34, Jens Axboe wrote:
>>> +enum {
>>> + /* correspond one-to-one to IOSQE_IO_* flags*/
>>> + REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), /* ctx owns file */
>>
>> I'd put the comment on top of the line instead, these lines are way too
>> long.
>
> I find it less readable, but let's see

Well, the monster lines are unreadable on an 80-char display... I'm generally
not a stickler with 80 chars, if a few over needs we don't need to break,
I'm all for it. But the long comments are unreadable for me.

--
Jens Axboe

2020-01-18 20:48:28

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] io_uring: optimise sqe-to-req flags translation

On 1/18/20 10:22 AM, Pavel Begunkov wrote:
> For each IOSQE_* flag there is a corresponding REQ_F_* flag. And there
> is a repetitive pattern of their translation:
> e.g. if (sqe->flags & SQE_FLAG*) req->flags |= REQ_F_FLAG*
>
> Use same numeric values/bits for them and copy instead of manual
> handling.

Thanks, applied.

--
Jens Axboe

2020-01-19 07:49:08

by Pavel Begunkov

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] io_uring: optimise sqe-to-req flags translation

On 18/01/2020 23:46, Jens Axboe wrote:
> On 1/18/20 10:22 AM, Pavel Begunkov wrote:
>> For each IOSQE_* flag there is a corresponding REQ_F_* flag. And there
>> is a repetitive pattern of their translation:
>> e.g. if (sqe->flags & SQE_FLAG*) req->flags |= REQ_F_FLAG*
>>
>> Use same numeric values/bits for them and copy instead of manual
>> handling.

I wonder, why this isn't a common practice around the kernel. E.g. I'm looking
at iocb_flags() and kiocb_set_rw_flags(), and their one by one flags copying is
just wasteful.

>
> Thanks, applied.
>

--
Pavel Begunkov


Attachments:
signature.asc (849.00 B)
OpenPGP digital signature

2020-01-19 17:38:03

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] io_uring: optimise sqe-to-req flags translation

On 1/19/20 12:46 AM, Pavel Begunkov wrote:
> On 18/01/2020 23:46, Jens Axboe wrote:
>> On 1/18/20 10:22 AM, Pavel Begunkov wrote:
>>> For each IOSQE_* flag there is a corresponding REQ_F_* flag. And there
>>> is a repetitive pattern of their translation:
>>> e.g. if (sqe->flags & SQE_FLAG*) req->flags |= REQ_F_FLAG*
>>>
>>> Use same numeric values/bits for them and copy instead of manual
>>> handling.
>
> I wonder, why this isn't a common practice around the kernel. E.g. I'm
> looking at iocb_flags() and kiocb_set_rw_flags(), and their one by one
> flags copying is just wasteful.

If I were to guess, I'd assume that it's due to continually adding flags
one at the time. For the first flag, it's not a big deal. If you end up
with a handful or more, it's clearly much better to have them occupy the
same space and avoid lots of branches checking and setting matching
flags.

You should send in patches for IOCB flags.

--
Jens Axboe