From: Adam Manzanares <[email protected]>
This is the per-I/O equivalent of the ioprio_set system call.
See the following link for performance implications on a SATA HDD:
https://lkml.org/lkml/2016/12/6/495
First patch factors ioprio_check_cap function out of ioprio_set system call to
also be used by the aio ioprio interface.
Second patch converts kiocb ki_hint field to a u16 to avoid kiocb bloat.
Third patch passes ioprio hint from aio iocb to kiocb and enables block_dev
usage of the per I/O ioprio feature.
v2: merge patches
use IOCB_FLAG_IOPRIO
validate intended use with IOCB_IOPRIO
add linux-api and linux-block to cc
v3: add ioprio_check_cap function
convert kiocb ki_hint to u16
use ioprio_check_cap when adding ioprio to kiocb in aio.c
v4: handle IOCB_IOPRIO in aio_prep_rw
note patch 3 depends on patch 1 in commit msg
Adam Manzanares (3):
block: add ioprio_check_cap function
fs: Convert kiocb rw_hint from enum to u16
fs: Add aio iopriority support for block_dev
block/ioprio.c | 22 ++++++++++++++++------
fs/aio.c | 16 ++++++++++++++++
fs/block_dev.c | 2 ++
include/linux/fs.h | 17 +++++++++++++++--
include/linux/ioprio.h | 2 ++
include/uapi/linux/aio_abi.h | 1 +
6 files changed, 52 insertions(+), 8 deletions(-)
--
2.15.1
From: Adam Manzanares <[email protected]>
In order to avoid kiocb bloat for per command iopriority support, rw_hint
is converted from enum to a u16. Added a guard around ki_hint assigment.
Signed-off-by: Adam Manzanares <[email protected]>
---
include/linux/fs.h | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 760d8da1b6c7..7a90ce387e00 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -284,6 +284,8 @@ enum rw_hint {
WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
};
+#define MAX_KI_HINT ((1 << 16) - 1) /* ki_hint type is u16 */
+
#define IOCB_EVENTFD (1 << 0)
#define IOCB_APPEND (1 << 1)
#define IOCB_DIRECT (1 << 2)
@@ -299,7 +301,7 @@ struct kiocb {
void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
void *private;
int ki_flags;
- enum rw_hint ki_hint;
+ u16 ki_hint;
} __randomize_layout;
static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -1927,12 +1929,21 @@ static inline enum rw_hint file_write_hint(struct file *file)
static inline int iocb_flags(struct file *file);
+/* ki_hint changed from enum to u16, make sure rw_hint fits into u16 */
+static inline u16 ki_hint_valid(enum rw_hint hint)
+{
+ if (hint > MAX_KI_HINT)
+ return 0;
+
+ return hint;
+}
+
static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
{
*kiocb = (struct kiocb) {
.ki_filp = filp,
.ki_flags = iocb_flags(filp),
- .ki_hint = file_write_hint(filp),
+ .ki_hint = ki_hint_valid(file_write_hint(filp)),
};
}
--
2.15.1
From: Adam Manzanares <[email protected]>
This is the per-I/O equivalent of the ioprio_set system call.
When IOCB_FLAG_IOPRIO is set on the iocb aio_flags field, then we set the
newly added kiocb ki_ioprio field to the value in the iocb aio_reqprio field.
When a bio is created for an aio request by the block dev we set the priority
value of the bio to the user supplied value.
This patch depends on block: add ioprio_check_cap function
Signed-off-by: Adam Manzanares <[email protected]>
---
fs/aio.c | 16 ++++++++++++++++
fs/block_dev.c | 2 ++
include/linux/fs.h | 2 ++
include/uapi/linux/aio_abi.h | 1 +
4 files changed, 21 insertions(+)
diff --git a/fs/aio.c b/fs/aio.c
index f3eae5d5771b..ff3107aa82d5 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1451,6 +1451,22 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
if (iocb->aio_flags & IOCB_FLAG_RESFD)
req->ki_flags |= IOCB_EVENTFD;
req->ki_hint = file_write_hint(req->ki_filp);
+ if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
+ /*
+ * If the IOCB_FLAG_IOPRIO flag of aio_flags is set, then
+ * aio_reqprio is interpreted as an I/O scheduling
+ * class and priority.
+ */
+ ret = ioprio_check_cap(iocb->aio_reqprio);
+ if (ret) {
+ pr_debug("aio ioprio check cap error\n");
+ return -EINVAL;
+ }
+
+ req->ki_ioprio = iocb->aio_reqprio;
+ req->ki_flags |= IOCB_IOPRIO;
+ }
+
ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
if (unlikely(ret))
fput(req->ki_filp);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7ec920e27065..970bef79caa6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -355,6 +355,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
bio->bi_write_hint = iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = blkdev_bio_end_io;
+ if (iocb->ki_flags & IOCB_IOPRIO)
+ bio->bi_ioprio = iocb->ki_ioprio;
ret = bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7a90ce387e00..3415e83f6350 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -294,6 +294,7 @@ enum rw_hint {
#define IOCB_SYNC (1 << 5)
#define IOCB_WRITE (1 << 6)
#define IOCB_NOWAIT (1 << 7)
+#define IOCB_IOPRIO (1 << 8)
struct kiocb {
struct file *ki_filp;
@@ -302,6 +303,7 @@ struct kiocb {
void *private;
int ki_flags;
u16 ki_hint;
+ u16 ki_ioprio; /* See linux/ioprio.h */
} __randomize_layout;
static inline bool is_sync_kiocb(struct kiocb *kiocb)
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index 2c0a3415beee..d4e768d55d14 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -55,6 +55,7 @@ enum {
* is valid.
*/
#define IOCB_FLAG_RESFD (1 << 0)
+#define IOCB_FLAG_IOPRIO (1 << 1)
/* read() from /dev/aio returns these structures. */
struct io_event {
--
2.15.1
From: Adam Manzanares <[email protected]>
Aio per command iopriority support introduces a second interface between
userland and the kernel capable of passing iopriority. The aio interface also
needs the ability to verify that the submitting context has sufficient
priviledges to submit IOPRIO_RT commands. This patch creates the
ioprio_check_cap function to be used by the ioprio_set system call and also by
the aio interface.
Signed-off-by: Adam Manzanares <[email protected]>
---
block/ioprio.c | 22 ++++++++++++++++------
include/linux/ioprio.h | 2 ++
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/block/ioprio.c b/block/ioprio.c
index 6f5d0b6625e3..f9821080c92c 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -61,15 +61,10 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
}
EXPORT_SYMBOL_GPL(set_task_ioprio);
-SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
+int ioprio_check_cap(int ioprio)
{
int class = IOPRIO_PRIO_CLASS(ioprio);
int data = IOPRIO_PRIO_DATA(ioprio);
- struct task_struct *p, *g;
- struct user_struct *user;
- struct pid *pgrp;
- kuid_t uid;
- int ret;
switch (class) {
case IOPRIO_CLASS_RT:
@@ -92,6 +87,21 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
return -EINVAL;
}
+ return 0;
+}
+
+SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
+{
+ struct task_struct *p, *g;
+ struct user_struct *user;
+ struct pid *pgrp;
+ kuid_t uid;
+ int ret;
+
+ ret = ioprio_check_cap(ioprio);
+ if (ret)
+ return ret;
+
ret = -ESRCH;
rcu_read_lock();
switch (which) {
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 627efac73e6d..4a28cec49ec3 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -77,4 +77,6 @@ extern int ioprio_best(unsigned short aprio, unsigned short bprio);
extern int set_task_ioprio(struct task_struct *task, int ioprio);
+extern int ioprio_check_cap(int ioprio);
+
#endif
--
2.15.1
On 5/17/18 2:38 PM, [email protected] wrote:
> From: Adam Manzanares <[email protected]>
>
> This is the per-I/O equivalent of the ioprio_set system call.
> See the following link for performance implications on a SATA HDD:
> https://lkml.org/lkml/2016/12/6/495
>
> First patch factors ioprio_check_cap function out of ioprio_set system call to
> also be used by the aio ioprio interface.
>
> Second patch converts kiocb ki_hint field to a u16 to avoid kiocb bloat.
>
> Third patch passes ioprio hint from aio iocb to kiocb and enables block_dev
> usage of the per I/O ioprio feature.
>
> v2: merge patches
> use IOCB_FLAG_IOPRIO
> validate intended use with IOCB_IOPRIO
> add linux-api and linux-block to cc
>
> v3: add ioprio_check_cap function
> convert kiocb ki_hint to u16
> use ioprio_check_cap when adding ioprio to kiocb in aio.c
>
> v4: handle IOCB_IOPRIO in aio_prep_rw
> note patch 3 depends on patch 1 in commit msg
>
> Adam Manzanares (3):
> block: add ioprio_check_cap function
> fs: Convert kiocb rw_hint from enum to u16
> fs: Add aio iopriority support for block_dev
>
> block/ioprio.c | 22 ++++++++++++++++------
> fs/aio.c | 16 ++++++++++++++++
> fs/block_dev.c | 2 ++
> include/linux/fs.h | 17 +++++++++++++++--
> include/linux/ioprio.h | 2 ++
> include/uapi/linux/aio_abi.h | 1 +
> 6 files changed, 52 insertions(+), 8 deletions(-)
This looks fine to me now. I can pick up #1 for 4.18 - and 2+3 as well,
unless someone else wants to take them.
--
Jens Axboe
On 5/17/18 7:41 PM, Jens Axboe wrote:
> On 5/17/18 2:38 PM, [email protected] wrote:
>> From: Adam Manzanares <[email protected]>
>>
>> This is the per-I/O equivalent of the ioprio_set system call.
>> See the following link for performance implications on a SATA HDD:
>> https://lkml.org/lkml/2016/12/6/495
>>
>> First patch factors ioprio_check_cap function out of ioprio_set system call to
>> also be used by the aio ioprio interface.
>>
>> Second patch converts kiocb ki_hint field to a u16 to avoid kiocb bloat.
>>
>> Third patch passes ioprio hint from aio iocb to kiocb and enables block_dev
>> usage of the per I/O ioprio feature.
>>
>> v2: merge patches
>> use IOCB_FLAG_IOPRIO
>> validate intended use with IOCB_IOPRIO
>> add linux-api and linux-block to cc
>>
>> v3: add ioprio_check_cap function
>> convert kiocb ki_hint to u16
>> use ioprio_check_cap when adding ioprio to kiocb in aio.c
>>
>> v4: handle IOCB_IOPRIO in aio_prep_rw
>> note patch 3 depends on patch 1 in commit msg
>>
>> Adam Manzanares (3):
>> block: add ioprio_check_cap function
>> fs: Convert kiocb rw_hint from enum to u16
>> fs: Add aio iopriority support for block_dev
>>
>> block/ioprio.c | 22 ++++++++++++++++------
>> fs/aio.c | 16 ++++++++++++++++
>> fs/block_dev.c | 2 ++
>> include/linux/fs.h | 17 +++++++++++++++--
>> include/linux/ioprio.h | 2 ++
>> include/uapi/linux/aio_abi.h | 1 +
>> 6 files changed, 52 insertions(+), 8 deletions(-)
>
> This looks fine to me now. I can pick up #1 for 4.18 - and 2+3 as well,
> unless someone else wants to take them.
Great, thanks Jens.
>
On 5/17/18 2:38 PM, [email protected] wrote:
> From: Adam Manzanares <[email protected]>
>
> This is the per-I/O equivalent of the ioprio_set system call.
>
> When IOCB_FLAG_IOPRIO is set on the iocb aio_flags field, then we set the
> newly added kiocb ki_ioprio field to the value in the iocb aio_reqprio field.
>
> When a bio is created for an aio request by the block dev we set the priority
> value of the bio to the user supplied value.
>
> This patch depends on block: add ioprio_check_cap function
Actually, one comment on this one:
> diff --git a/fs/aio.c b/fs/aio.c
> index f3eae5d5771b..ff3107aa82d5 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -1451,6 +1451,22 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
> if (iocb->aio_flags & IOCB_FLAG_RESFD)
> req->ki_flags |= IOCB_EVENTFD;
> req->ki_hint = file_write_hint(req->ki_filp);
> + if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
> + /*
> + * If the IOCB_FLAG_IOPRIO flag of aio_flags is set, then
> + * aio_reqprio is interpreted as an I/O scheduling
> + * class and priority.
> + */
> + ret = ioprio_check_cap(iocb->aio_reqprio);
> + if (ret) {
> + pr_debug("aio ioprio check cap error\n");
> + return -EINVAL;
> + }
> +
> + req->ki_ioprio = iocb->aio_reqprio;
> + req->ki_flags |= IOCB_IOPRIO;
> + }
Do we really need IOCB_IOPRIO? All zeroes is no priority set anyway,
so we should be able to get by with just setting ->ki_ioprio to either
the priority, or 0.
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 7ec920e27065..970bef79caa6 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -355,6 +355,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
> bio->bi_write_hint = iocb->ki_hint;
> bio->bi_private = dio;
> bio->bi_end_io = blkdev_bio_end_io;
> + if (iocb->ki_flags & IOCB_IOPRIO)
> + bio->bi_ioprio = iocb->ki_ioprio;
And then this assignment can just happen unconditionally.
--
Jens Axboe
On 5/18/18 8:14 AM, Jens Axboe wrote:
> On 5/17/18 2:38 PM, [email protected] wrote:
>> From: Adam Manzanares <[email protected]>
>>
>> This is the per-I/O equivalent of the ioprio_set system call.
>>
>> When IOCB_FLAG_IOPRIO is set on the iocb aio_flags field, then we set the
>> newly added kiocb ki_ioprio field to the value in the iocb aio_reqprio field.
>>
>> When a bio is created for an aio request by the block dev we set the priority
>> value of the bio to the user supplied value.
>>
>> This patch depends on block: add ioprio_check_cap function
>
> Actually, one comment on this one:
>
>> diff --git a/fs/aio.c b/fs/aio.c
>> index f3eae5d5771b..ff3107aa82d5 100644
>> --- a/fs/aio.c
>> +++ b/fs/aio.c
>> @@ -1451,6 +1451,22 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
>> if (iocb->aio_flags & IOCB_FLAG_RESFD)
>> req->ki_flags |= IOCB_EVENTFD;
>> req->ki_hint = file_write_hint(req->ki_filp);
>> + if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
>> + /*
>> + * If the IOCB_FLAG_IOPRIO flag of aio_flags is set, then
>> + * aio_reqprio is interpreted as an I/O scheduling
>> + * class and priority.
>> + */
>> + ret = ioprio_check_cap(iocb->aio_reqprio);
>> + if (ret) {
>> + pr_debug("aio ioprio check cap error\n");
>> + return -EINVAL;
>> + }
>> +
>> + req->ki_ioprio = iocb->aio_reqprio;
>> + req->ki_flags |= IOCB_IOPRIO;
>> + }
>
> Do we really need IOCB_IOPRIO? All zeroes is no priority set anyway,
> so we should be able to get by with just setting ->ki_ioprio to either
> the priority, or 0.
>
>> diff --git a/fs/block_dev.c b/fs/block_dev.c
>> index 7ec920e27065..970bef79caa6 100644
>> --- a/fs/block_dev.c
>> +++ b/fs/block_dev.c
>> @@ -355,6 +355,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
>> bio->bi_write_hint = iocb->ki_hint;
>> bio->bi_private = dio;
>> bio->bi_end_io = blkdev_bio_end_io;
>> + if (iocb->ki_flags & IOCB_IOPRIO)
>> + bio->bi_ioprio = iocb->ki_ioprio;
>
> And then this assignment can just happen unconditionally.
That is a cleaner way of guaranteeing the ioprio set on the kiocb is
only set when the user intends to use the ioprio from the iocb.
I'll resend the series.
>
On Thu, May 17, 2018 at 01:38:01PM -0700, [email protected] wrote:
> From: Adam Manzanares <[email protected]>
>
> Aio per command iopriority support introduces a second interface between
> userland and the kernel capable of passing iopriority. The aio interface also
> needs the ability to verify that the submitting context has sufficient
> priviledges to submit IOPRIO_RT commands. This patch creates the
> ioprio_check_cap function to be used by the ioprio_set system call and also by
> the aio interface.
>
> Signed-off-by: Adam Manzanares <[email protected]>
Looks fine,
Reviewed-by: Christoph Hellwig <[email protected]>
> +/* ki_hint changed from enum to u16, make sure rw_hint fits into u16 */
I don't think this comment is very useful.
> +static inline u16 ki_hint_valid(enum rw_hint hint)
I'd call this ki_hint_validate.
> +{
> + if (hint > MAX_KI_HINT)
> + return 0;
> +
> + return hint;
Nit: kill the empty line.
Looks fine, although I'd split it into a aio and block_dev patch.
Also please wire this up for the fs/iomap.c direct I/O code, it should
be essentially the same sniplet as in the block_dev.c code.
On 5/18/18 9:05 AM, Christoph Hellwig wrote:
>> +/* ki_hint changed from enum to u16, make sure rw_hint fits into u16 */
>
> I don't think this comment is very useful.
>
>> +static inline u16 ki_hint_valid(enum rw_hint hint)
>
> I'd call this ki_hint_validate.
>
>> +{
>> + if (hint > MAX_KI_HINT)
>> + return 0;
>> +
>> + return hint;
>
> Nit: kill the empty line.
>
I'll clean this up in the next revision.
On 5/18/18 9:06 AM, Christoph Hellwig wrote:
> Looks fine, although I'd split it into a aio and block_dev patch.
>
> Also please wire this up for the fs/iomap.c direct I/O code, it should
> be essentially the same sniplet as in the block_dev.c code.
>
Will do.
Hi Adam,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on next-20180516]
[cannot apply to linus/master block/for-next v4.17-rc5 v4.17-rc4 v4.17-rc3 v4.17-rc5]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/adam-manzanares-wdc-com/AIO-add-per-command-iopriority/20180519-031848
config: x86_64-randconfig-x013-201819 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
All errors (new ones prefixed by >>):
fs/aio.c: In function 'aio_prep_rw':
>> fs/aio.c:1460:9: error: implicit declaration of function 'ioprio_check_cap'; did you mean 'param_check_charp'? [-Werror=implicit-function-declaration]
ret = ioprio_check_cap(iocb->aio_reqprio);
^~~~~~~~~~~~~~~~
param_check_charp
cc1: some warnings being treated as errors
vim +1460 fs/aio.c
1440
1441 static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
1442 {
1443 int ret;
1444
1445 req->ki_filp = fget(iocb->aio_fildes);
1446 if (unlikely(!req->ki_filp))
1447 return -EBADF;
1448 req->ki_complete = aio_complete_rw;
1449 req->ki_pos = iocb->aio_offset;
1450 req->ki_flags = iocb_flags(req->ki_filp);
1451 if (iocb->aio_flags & IOCB_FLAG_RESFD)
1452 req->ki_flags |= IOCB_EVENTFD;
1453 req->ki_hint = file_write_hint(req->ki_filp);
1454 if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
1455 /*
1456 * If the IOCB_FLAG_IOPRIO flag of aio_flags is set, then
1457 * aio_reqprio is interpreted as an I/O scheduling
1458 * class and priority.
1459 */
> 1460 ret = ioprio_check_cap(iocb->aio_reqprio);
1461 if (ret) {
1462 pr_debug("aio ioprio check cap error\n");
1463 return -EINVAL;
1464 }
1465
1466 req->ki_ioprio = iocb->aio_reqprio;
1467 req->ki_flags |= IOCB_IOPRIO;
1468 }
1469
1470 ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
1471 if (unlikely(ret))
1472 fput(req->ki_filp);
1473 return ret;
1474 }
1475
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation