From: Adam Manzanares <[email protected]>
This is the per-I/O equivalent of the ioprio_set system call.
See the following link for performance implications on a SATA HDD:
https://lkml.org/lkml/2016/12/6/495
First patch factors ioprio_check_cap function out of ioprio_set system call to
also be used by the aio ioprio interface.
Second patch converts kiocb ki_hint field to a u16 to avoid kiocb bloat.
Third patch passes ioprio hint from aio iocb to kiocb and initializes kiocb
ioprio value appropriately when it is not explicitly set.
Fourth patch enables the feature for blkdev.
Fifth patch enables the feature for iomap direct IO
Note: this work is based on top of linux-vfs/for-next
v2: merge patches
use IOCB_FLAG_IOPRIO
validate intended use with IOCB_IOPRIO
add linux-api and linux-block to cc
v3: add ioprio_check_cap function
convert kiocb ki_hint to u16
use ioprio_check_cap when adding ioprio to kiocb in aio.c
v4: handle IOCB_IOPRIO in aio_prep_rw
note patch 3 depends on patch 1 in commit msg
v5: rename ki_hint_valid -> ki_hint_validate
remove ki_hint_validate comment and whitespace
remove IOCB_IOPRIO flag
initialize kiocb to have no priority
v6: add __blkdev_direct_IO_simple ioprio support
v7: Tie ki_hint_validate to kiocb ki_hint type
Add additional ki_hint_validate check
Adam Manzanares (5):
block: add ioprio_check_cap function
fs: Convert kiocb rw_hint from enum to u16
fs: Add aio iopriority support
fs: blkdev set bio prio from kiocb prio
fs: iomap dio set bio prio from kiocb prio
block/ioprio.c | 22 ++++++++++++++++------
drivers/block/loop.c | 3 +++
fs/aio.c | 18 +++++++++++++++++-
fs/block_dev.c | 2 ++
fs/iomap.c | 1 +
include/linux/fs.h | 16 ++++++++++++++--
include/linux/ioprio.h | 2 ++
include/uapi/linux/aio_abi.h | 1 +
8 files changed, 56 insertions(+), 9 deletions(-)
--
2.15.1
From: Adam Manzanares <[email protected]>
Now that kiocb has an ioprio field copy this over to the bio when it is
created from the kiocb.
Signed-off-by: Adam Manzanares <[email protected]>
Reviewed-by: Jeff Moyer <[email protected]>
---
fs/block_dev.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7ec920e27065..11ba99e79d2a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -216,6 +216,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
bio.bi_write_hint = iocb->ki_hint;
bio.bi_private = current;
bio.bi_end_io = blkdev_bio_end_io_simple;
+ bio.bi_ioprio = iocb->ki_ioprio;
ret = bio_iov_iter_get_pages(&bio, iter);
if (unlikely(ret))
@@ -355,6 +356,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
bio->bi_write_hint = iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = blkdev_bio_end_io;
+ bio->bi_ioprio = iocb->ki_ioprio;
ret = bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret)) {
--
2.15.1
From: Adam Manzanares <[email protected]>
This is the per-I/O equivalent of the ioprio_set system call.
When IOCB_FLAG_IOPRIO is set on the iocb aio_flags field, then we set the
newly added kiocb ki_ioprio field to the value in the iocb aio_reqprio field.
This patch depends on block: add ioprio_check_cap function.
Signed-off-by: Adam Manzanares <[email protected]>
Reviewed-by: Jeff Moyer <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
---
drivers/block/loop.c | 3 +++
fs/aio.c | 16 ++++++++++++++++
include/linux/fs.h | 3 +++
include/uapi/linux/aio_abi.h | 1 +
4 files changed, 23 insertions(+)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 5d4e31655d96..dd98dfd97f5e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -76,6 +76,8 @@
#include <linux/miscdevice.h>
#include <linux/falloc.h>
#include <linux/uio.h>
+#include <linux/ioprio.h>
+
#include "loop.h"
#include <linux/uaccess.h>
@@ -559,6 +561,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
cmd->iocb.ki_filp = file;
cmd->iocb.ki_complete = lo_rw_aio_complete;
cmd->iocb.ki_flags = IOCB_DIRECT;
+ cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
if (cmd->css)
kthread_associate_blkcg(cmd->css);
diff --git a/fs/aio.c b/fs/aio.c
index 87d8939bb1e4..76a9d4c14e55 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1451,6 +1451,22 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
if (iocb->aio_flags & IOCB_FLAG_RESFD)
req->ki_flags |= IOCB_EVENTFD;
req->ki_hint = ki_hint_validate(file_write_hint(req->ki_filp));
+ if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
+ /*
+ * If the IOCB_FLAG_IOPRIO flag of aio_flags is set, then
+ * aio_reqprio is interpreted as an I/O scheduling
+ * class and priority.
+ */
+ ret = ioprio_check_cap(iocb->aio_reqprio);
+ if (ret) {
+ pr_debug("aio ioprio check cap error\n");
+ return -EINVAL;
+ }
+
+ req->ki_ioprio = iocb->aio_reqprio;
+ } else
+ req->ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+
ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
if (unlikely(ret))
fput(req->ki_filp);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9b76ee73af14..0c61d5987879 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -36,6 +36,7 @@
#include <linux/delayed_call.h>
#include <linux/uuid.h>
#include <linux/errseq.h>
+#include <linux/ioprio.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -300,6 +301,7 @@ struct kiocb {
void *private;
int ki_flags;
u16 ki_hint;
+ u16 ki_ioprio; /* See linux/ioprio.h */
} __randomize_layout;
static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -1942,6 +1944,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
.ki_filp = filp,
.ki_flags = iocb_flags(filp),
.ki_hint = ki_hint_validate(file_write_hint(filp)),
+ .ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0),
};
}
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index 2c0a3415beee..d4e768d55d14 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -55,6 +55,7 @@ enum {
* is valid.
*/
#define IOCB_FLAG_RESFD (1 << 0)
+#define IOCB_FLAG_IOPRIO (1 << 1)
/* read() from /dev/aio returns these structures. */
struct io_event {
--
2.15.1
From: Adam Manzanares <[email protected]>
Aio per command iopriority support introduces a second interface between
userland and the kernel capable of passing iopriority. The aio interface also
needs the ability to verify that the submitting context has sufficient
privileges to submit IOPRIO_RT commands. This patch creates the
ioprio_check_cap function to be used by the ioprio_set system call and also by
the aio interface.
Signed-off-by: Adam Manzanares <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
Reviewed-by: Jeff Moyer <[email protected]>
---
block/ioprio.c | 22 ++++++++++++++++------
include/linux/ioprio.h | 2 ++
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/block/ioprio.c b/block/ioprio.c
index 6f5d0b6625e3..f9821080c92c 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -61,15 +61,10 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
}
EXPORT_SYMBOL_GPL(set_task_ioprio);
-SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
+int ioprio_check_cap(int ioprio)
{
int class = IOPRIO_PRIO_CLASS(ioprio);
int data = IOPRIO_PRIO_DATA(ioprio);
- struct task_struct *p, *g;
- struct user_struct *user;
- struct pid *pgrp;
- kuid_t uid;
- int ret;
switch (class) {
case IOPRIO_CLASS_RT:
@@ -92,6 +87,21 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
return -EINVAL;
}
+ return 0;
+}
+
+SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
+{
+ struct task_struct *p, *g;
+ struct user_struct *user;
+ struct pid *pgrp;
+ kuid_t uid;
+ int ret;
+
+ ret = ioprio_check_cap(ioprio);
+ if (ret)
+ return ret;
+
ret = -ESRCH;
rcu_read_lock();
switch (which) {
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 627efac73e6d..4a28cec49ec3 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -77,4 +77,6 @@ extern int ioprio_best(unsigned short aprio, unsigned short bprio);
extern int set_task_ioprio(struct task_struct *task, int ioprio);
+extern int ioprio_check_cap(int ioprio);
+
#endif
--
2.15.1
From: Adam Manzanares <[email protected]>
In order to avoid kiocb bloat for per command iopriority support, rw_hint
is converted from enum to a u16. Added a guard around ki_hint assignment.
Signed-off-by: Adam Manzanares <[email protected]>
---
fs/aio.c | 2 +-
include/linux/fs.h | 13 +++++++++++--
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 755d3f57bcc8..87d8939bb1e4 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1450,7 +1450,7 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
req->ki_flags = iocb_flags(req->ki_filp);
if (iocb->aio_flags & IOCB_FLAG_RESFD)
req->ki_flags |= IOCB_EVENTFD;
- req->ki_hint = file_write_hint(req->ki_filp);
+ req->ki_hint = ki_hint_validate(file_write_hint(req->ki_filp));
ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
if (unlikely(ret))
fput(req->ki_filp);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7f07977bdfd7..9b76ee73af14 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -299,7 +299,7 @@ struct kiocb {
void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
void *private;
int ki_flags;
- enum rw_hint ki_hint;
+ u16 ki_hint;
} __randomize_layout;
static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -1927,12 +1927,21 @@ static inline enum rw_hint file_write_hint(struct file *file)
static inline int iocb_flags(struct file *file);
+static inline u16 ki_hint_validate(enum rw_hint hint)
+{
+ typeof(((struct kiocb *)0)->ki_hint) max_hint = -1;
+
+ if (hint <= max_hint)
+ return hint;
+ return 0;
+}
+
static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
{
*kiocb = (struct kiocb) {
.ki_filp = filp,
.ki_flags = iocb_flags(filp),
- .ki_hint = file_write_hint(filp),
+ .ki_hint = ki_hint_validate(file_write_hint(filp)),
};
}
--
2.15.1
On 5/22/18 11:52 AM, [email protected] wrote:
> From: Adam Manzanares <[email protected]>
>
> This is the per-I/O equivalent of the ioprio_set system call.
> See the following link for performance implications on a SATA HDD:
> https://lkml.org/lkml/2016/12/6/495
>
> First patch factors ioprio_check_cap function out of ioprio_set system call to
> also be used by the aio ioprio interface.
>
> Second patch converts kiocb ki_hint field to a u16 to avoid kiocb bloat.
>
> Third patch passes ioprio hint from aio iocb to kiocb and initializes kiocb
> ioprio value appropriately when it is not explicitly set.
>
> Fourth patch enables the feature for blkdev.
>
> Fifth patch enables the feature for iomap direct IO
LGTM, you can add:
Reviewed-by: Jens Axboe <[email protected]>
Al, are you picking this series up, or should I?
--
Jens Axboe
From: Adam Manzanares <[email protected]>
Now that kiocb has an ioprio field copy this over to the bio when it is
created from the kiocb during direct IO.
Signed-off-by: Adam Manzanares <[email protected]>
Reviewed-by: Jeff Moyer <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
---
fs/iomap.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/iomap.c b/fs/iomap.c
index afd163586aa0..65aae194aeca 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -919,6 +919,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
bio->bi_iter.bi_sector =
(iomap->addr + pos - iomap->offset) >> 9;
bio->bi_write_hint = dio->iocb->ki_hint;
+ bio->bi_ioprio = dio->iocb->ki_ioprio;
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
--
2.15.1
On Tue, May 22, 2018 at 11:55:04AM -0600, Jens Axboe wrote:
> On 5/22/18 11:52 AM, [email protected] wrote:
> > From: Adam Manzanares <[email protected]>
> >
> > This is the per-I/O equivalent of the ioprio_set system call.
> > See the following link for performance implications on a SATA HDD:
> > https://lkml.org/lkml/2016/12/6/495
> >
> > First patch factors ioprio_check_cap function out of ioprio_set system call to
> > also be used by the aio ioprio interface.
> >
> > Second patch converts kiocb ki_hint field to a u16 to avoid kiocb bloat.
> >
> > Third patch passes ioprio hint from aio iocb to kiocb and initializes kiocb
> > ioprio value appropriately when it is not explicitly set.
> >
> > Fourth patch enables the feature for blkdev.
> >
> > Fifth patch enables the feature for iomap direct IO
>
> LGTM, you can add:
>
> Reviewed-by: Jens Axboe <[email protected]>
>
> Al, are you picking this series up, or should I?
Probably better if I do, once I finish reviewing Christoph's patchset -
we already have a bunch of stuff around fs/aio.c in this cycle...
On 5/22/18 12:30 PM, Al Viro wrote:
> On Tue, May 22, 2018 at 11:55:04AM -0600, Jens Axboe wrote:
>> On 5/22/18 11:52 AM, [email protected] wrote:
>>> From: Adam Manzanares <[email protected]>
>>>
>>> This is the per-I/O equivalent of the ioprio_set system call.
>>> See the following link for performance implications on a SATA HDD:
>>> https://lkml.org/lkml/2016/12/6/495
>>>
>>> First patch factors ioprio_check_cap function out of ioprio_set system call to
>>> also be used by the aio ioprio interface.
>>>
>>> Second patch converts kiocb ki_hint field to a u16 to avoid kiocb bloat.
>>>
>>> Third patch passes ioprio hint from aio iocb to kiocb and initializes kiocb
>>> ioprio value appropriately when it is not explicitly set.
>>>
>>> Fourth patch enables the feature for blkdev.
>>>
>>> Fifth patch enables the feature for iomap direct IO
>>
>> LGTM, you can add:
>>
>> Reviewed-by: Jens Axboe <[email protected]>
>>
>> Al, are you picking this series up, or should I?
>
> Probably better if I do, once I finish reviewing Christoph's patchset -
> we already have a bunch of stuff around fs/aio.c in this cycle...
Alright, sounds good, thanks Al.
--
Jens Axboe
On 5/22/18 11:30 AM, Jens Axboe wrote:
> On 5/22/18 12:30 PM, Al Viro wrote:
>> On Tue, May 22, 2018 at 11:55:04AM -0600, Jens Axboe wrote:
>>> On 5/22/18 11:52 AM, [email protected] wrote:
>>>> From: Adam Manzanares <[email protected]>
>>>>
>>>> This is the per-I/O equivalent of the ioprio_set system call.
>>>> See the following link for performance implications on a SATA HDD:
>>>> https://lkml.org/lkml/2016/12/6/495
>>>>
>>>> First patch factors ioprio_check_cap function out of ioprio_set system call to
>>>> also be used by the aio ioprio interface.
>>>>
>>>> Second patch converts kiocb ki_hint field to a u16 to avoid kiocb bloat.
>>>>
>>>> Third patch passes ioprio hint from aio iocb to kiocb and initializes kiocb
>>>> ioprio value appropriately when it is not explicitly set.
>>>>
>>>> Fourth patch enables the feature for blkdev.
>>>>
>>>> Fifth patch enables the feature for iomap direct IO
>>>
>>> LGTM, you can add:
>>>
>>> Reviewed-by: Jens Axboe <[email protected]>
>>>
>>> Al, are you picking this series up, or should I?
>>
>> Probably better if I do, once I finish reviewing Christoph's patchset -
>> we already have a bunch of stuff around fs/aio.c in this cycle...
>
> Alright, sounds good, thanks Al.
>
I was working on the man page update for this feature and noticed I
could be bit more informative on error if I return the error value
returned by ioprio_check_cap in fs/aio.c.
Should I resend the whole series?