Implement a wrapper for aio_read()/write() to allow async IO on files
not implementing the iter version of read/write, such as sysfs. This
mimics how readv/writev uses non-iter ops in do_loop_readv_writev().
Signed-off-by: Bjorn Andersson <[email protected]>
---
fs/aio.c | 52 +++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 45 insertions(+), 7 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index f9f441b59966..0137a1a9bef1 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1514,12 +1514,44 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
}
}
+static ssize_t aio_iter_readv_writev(struct file *file, struct kiocb *req,
+ struct iov_iter *iter, int type)
+{
+ ssize_t ret = 0;
+ ssize_t nr;
+
+ while (iov_iter_count(iter)) {
+ struct iovec iovec = iov_iter_iovec(iter);
+
+ if (type == READ) {
+ nr = file->f_op->read(file, iovec.iov_base,
+ iovec.iov_len, &req->ki_pos);
+ } else {
+ nr = file->f_op->write(file, iovec.iov_base,
+ iovec.iov_len, &req->ki_pos);
+ }
+
+ if (nr < 0) {
+ ret = nr;
+ break;
+ }
+
+ ret += nr;
+ if (nr != iovec.iov_len)
+ break;
+ iov_iter_advance(iter, nr);
+ }
+
+ return ret;
+}
+
static int aio_read(struct kiocb *req, const struct iocb *iocb,
bool vectored, bool compat)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
struct file *file;
+ ssize_t count;
int ret;
ret = aio_prep_rw(req, iocb);
@@ -1529,15 +1561,18 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb,
if (unlikely(!(file->f_mode & FMODE_READ)))
return -EBADF;
ret = -EINVAL;
- if (unlikely(!file->f_op->read_iter))
- return -EINVAL;
ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
if (ret < 0)
return ret;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
- if (!ret)
- aio_rw_done(req, call_read_iter(file, req, &iter));
+ if (!ret) {
+ if (likely(file->f_op->read_iter))
+ count = call_read_iter(file, req, &iter);
+ else
+ count = aio_iter_readv_writev(file, req, &iter, READ);
+ aio_rw_done(req, count);
+ }
kfree(iovec);
return ret;
}
@@ -1548,6 +1583,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
struct file *file;
+ ssize_t count;
int ret;
ret = aio_prep_rw(req, iocb);
@@ -1557,8 +1593,6 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
if (unlikely(!(file->f_mode & FMODE_WRITE)))
return -EBADF;
- if (unlikely(!file->f_op->write_iter))
- return -EINVAL;
ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
if (ret < 0)
@@ -1577,7 +1611,11 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
}
req->ki_flags |= IOCB_WRITE;
- aio_rw_done(req, call_write_iter(file, req, &iter));
+ if (likely(file->f_op->write_iter))
+ count = call_write_iter(file, req, &iter);
+ else
+ count = aio_iter_readv_writev(file, req, &iter, WRITE);
+ aio_rw_done(req, count);
}
kfree(iovec);
return ret;
--
2.18.0
On Thu, Jul 18, 2019 at 04:10:54PM -0700, Bjorn Andersson wrote:
> Implement a wrapper for aio_read()/write() to allow async IO on files
> not implementing the iter version of read/write, such as sysfs. This
> mimics how readv/writev uses non-iter ops in do_loop_readv_writev().
IDGI. How would that IO manage to be async? And what's the point
using aio in such situations in the first place?
On Thu 18 Jul 16:17 PDT 2019, Al Viro wrote:
> On Thu, Jul 18, 2019 at 04:10:54PM -0700, Bjorn Andersson wrote:
> > Implement a wrapper for aio_read()/write() to allow async IO on files
> > not implementing the iter version of read/write, such as sysfs. This
> > mimics how readv/writev uses non-iter ops in do_loop_readv_writev().
>
> IDGI. How would that IO manage to be async? And what's the point
> using aio in such situations in the first place?
The point is that an application using aio to submit io operations on a
set of files, can use the same mechanism to read/write files that
happens to be implemented by driver only implementing read/write (not
read_iter/write_iter) in the registered file_operations struct, such as
kernfs.
In this particular case I have a sysfs file that is accessing hardware
and hence will block for a while and using this patch I can io_submit()
a write and handle the completion of this in my normal event loop.
Each individual io operation will be just as synchronous as the current
iter-based mechanism - for the drivers that implement that.
Regards,
Bjorn
On Thu, Jul 18, 2019 at 04:43:52PM -0700, Bjorn Andersson wrote:
> On Thu 18 Jul 16:17 PDT 2019, Al Viro wrote:
>
> > On Thu, Jul 18, 2019 at 04:10:54PM -0700, Bjorn Andersson wrote:
> > > Implement a wrapper for aio_read()/write() to allow async IO on files
> > > not implementing the iter version of read/write, such as sysfs. This
> > > mimics how readv/writev uses non-iter ops in do_loop_readv_writev().
> >
> > IDGI. How would that IO manage to be async? And what's the point
> > using aio in such situations in the first place?
>
> The point is that an application using aio to submit io operations on a
> set of files,
... for no reason whatsoever, I take it?
> can use the same mechanism to read/write files that
> happens to be implemented by driver only implementing read/write (not
> read_iter/write_iter) in the registered file_operations struct, such as
> kernfs.
... except that it still has to support the kernels that don't have
your patch, so the fallback in userland is *not* going away.
On Thu, Jul 18, 2019 at 04:43:52PM -0700, Bjorn Andersson wrote:
> On Thu 18 Jul 16:17 PDT 2019, Al Viro wrote:
>
> > On Thu, Jul 18, 2019 at 04:10:54PM -0700, Bjorn Andersson wrote:
> > > Implement a wrapper for aio_read()/write() to allow async IO on files
> > > not implementing the iter version of read/write, such as sysfs. This
> > > mimics how readv/writev uses non-iter ops in do_loop_readv_writev().
> >
> > IDGI. How would that IO manage to be async? And what's the point
> > using aio in such situations in the first place?
>
> The point is that an application using aio to submit io operations on a
> set of files, can use the same mechanism to read/write files that
> happens to be implemented by driver only implementing read/write (not
> read_iter/write_iter) in the registered file_operations struct, such as
> kernfs.
>
> In this particular case I have a sysfs file that is accessing hardware
> and hence will block for a while and using this patch I can io_submit()
> a write and handle the completion of this in my normal event loop.
>
>
> Each individual io operation will be just as synchronous as the current
> iter-based mechanism - for the drivers that implement that.
Just adding the fops is not enough. I have patches floating around at
Solace that add thread based fallbacks for files that don't have an aio
read / write implementation, but I'm not working on that code any more.
The thread based methods were quite useful in applications that had a need
for using other kernel infrastructure in their main event loops.
-ben
> Regards,
> Bjorn
>
--
"Thought is the essence of where you are now."
On Thu 18 Jul 16:56 PDT 2019, Benjamin LaHaise wrote:
> On Thu, Jul 18, 2019 at 04:43:52PM -0700, Bjorn Andersson wrote:
> > On Thu 18 Jul 16:17 PDT 2019, Al Viro wrote:
> >
> > > On Thu, Jul 18, 2019 at 04:10:54PM -0700, Bjorn Andersson wrote:
> > > > Implement a wrapper for aio_read()/write() to allow async IO on files
> > > > not implementing the iter version of read/write, such as sysfs. This
> > > > mimics how readv/writev uses non-iter ops in do_loop_readv_writev().
> > >
> > > IDGI. How would that IO manage to be async? And what's the point
> > > using aio in such situations in the first place?
> >
> > The point is that an application using aio to submit io operations on a
> > set of files, can use the same mechanism to read/write files that
> > happens to be implemented by driver only implementing read/write (not
> > read_iter/write_iter) in the registered file_operations struct, such as
> > kernfs.
> >
> > In this particular case I have a sysfs file that is accessing hardware
> > and hence will block for a while and using this patch I can io_submit()
> > a write and handle the completion of this in my normal event loop.
> >
> >
> > Each individual io operation will be just as synchronous as the current
> > iter-based mechanism - for the drivers that implement that.
>
> Just adding the fops is not enough. I have patches floating around at
> Solace that add thread based fallbacks for files that don't have an aio
> read / write implementation, but I'm not working on that code any more.
My bad. Took another look and now I see the bigger picture of how this
is currently implemented and why just adding the fops would defeat the
purpose of the api.
Sorry for the noise.
> The thread based methods were quite useful in applications that had a need
> for using other kernel infrastructure in their main event loops.
>
Yes indeed.
Regards,
Bjorn