In read/write ops, preincrement f_pos when no offset is specified, and
then attempt fix up the position after IO completes if it completed less
than expected. This fixes the problem where multiple queued up IO will all
obtain the same f_pos, and so perform the same read/write.
This is still not as consistent as sync r/w, as it is able to advance the
file offset past the end of the file. It seems it would be quite a
performance hit to work around this limitation - such as by keeping track
of concurrent operations - and the downside does not seem to be too
problematic.
The attempt to fix up the f_pos after will at least mean that in situations
where a single operation is run, then the position will be consistent.
Co-developed-by: Jens Axboe <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
Signed-off-by: Dylan Yudaken <[email protected]>
---
fs/io_uring.c | 81 ++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 68 insertions(+), 13 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index abd8c739988e..a951d0754899 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3066,21 +3066,71 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
}
}
-static inline loff_t*
-io_kiocb_update_pos(struct io_kiocb *req, struct kiocb *kiocb)
+static inline bool
+io_kiocb_update_pos(struct io_kiocb *req, struct kiocb *kiocb,
+ loff_t **ppos, u64 expected, bool force_nonblock)
{
bool is_stream = req->file->f_mode & FMODE_STREAM;
if (kiocb->ki_pos == -1) {
if (!is_stream) {
- req->flags |= REQ_F_CUR_POS;
+ *ppos = &kiocb->ki_pos;
+ WARN_ON(req->flags & REQ_F_CUR_POS);
+ if (req->file->f_mode & FMODE_ATOMIC_POS) {
+ if (force_nonblock) {
+ if (!mutex_trylock(&req->file->f_pos_lock))
+ return true;
+ } else {
+ mutex_lock(&req->file->f_pos_lock);
+ }
+ }
kiocb->ki_pos = req->file->f_pos;
- return &kiocb->ki_pos;
+ req->flags |= REQ_F_CUR_POS;
+ req->file->f_pos += expected;
+ if (req->file->f_mode & FMODE_ATOMIC_POS)
+ mutex_unlock(&req->file->f_pos_lock);
+ return false;
} else {
kiocb->ki_pos = 0;
- return NULL;
+ *ppos = NULL;
+ return false;
}
}
- return is_stream ? NULL : &kiocb->ki_pos;
+ *ppos = is_stream ? NULL : &kiocb->ki_pos;
+ return false;
+}
+
+static inline void
+io_kiocb_done_pos(struct io_kiocb *req, struct kiocb *kiocb, u64 actual)
+{
+ u64 expected;
+
+ if (likely(!(req->flags & REQ_F_CUR_POS)))
+ return;
+
+ expected = req->rw.len;
+ if (actual >= expected)
+ return;
+
+ /*
+ * It's not definitely safe to lock here, and the assumption is,
+ * that if we cannot lock the position that it will be changing,
+ * and if it will be changing - then we can't update it anyway
+ */
+ if (req->file->f_mode & FMODE_ATOMIC_POS
+ && !mutex_trylock(&req->file->f_pos_lock))
+ return;
+
+ /*
+ * now we want to move the pointer, but only if everything is consistent
+ * with how we left it originally
+ */
+ if (req->file->f_pos == kiocb->ki_pos + (expected - actual))
+ req->file->f_pos = kiocb->ki_pos;
+
+ /* else something else messed with f_pos and we can't do anything */
+
+ if (req->file->f_mode & FMODE_ATOMIC_POS)
+ mutex_unlock(&req->file->f_pos_lock);
}
static void kiocb_done(struct io_kiocb *req, ssize_t ret,
@@ -3096,8 +3146,7 @@ static void kiocb_done(struct io_kiocb *req, ssize_t ret,
ret += io->bytes_done;
}
- if (req->flags & REQ_F_CUR_POS)
- req->file->f_pos = req->rw.kiocb.ki_pos;
+ io_kiocb_done_pos(req, &req->rw.kiocb, ret >= 0 ? ret : 0);
if (ret >= 0 && (req->rw.kiocb.ki_complete == io_complete_rw))
__io_complete_rw(req, ret, issue_flags);
else
@@ -3662,21 +3711,23 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
if (force_nonblock) {
/* If the file doesn't support async, just async punt */
- if (unlikely(!io_file_supports_nowait(req))) {
+ if (unlikely(!io_file_supports_nowait(req) ||
+ io_kiocb_update_pos(req, kiocb, &ppos,
+ req->rw.len, true))) {
ret = io_setup_async_rw(req, iovec, s, true);
return ret ?: -EAGAIN;
}
kiocb->ki_flags |= IOCB_NOWAIT;
} else {
+ io_kiocb_update_pos(req, kiocb, &ppos, req->rw.len, false);
/* Ensure we clear previously set non-block flag */
kiocb->ki_flags &= ~IOCB_NOWAIT;
}
- ppos = io_kiocb_update_pos(req, kiocb);
-
ret = rw_verify_area(READ, req->file, ppos, req->result);
if (unlikely(ret)) {
kfree(iovec);
+ io_kiocb_done_pos(req, kiocb, 0);
return ret;
}
@@ -3798,14 +3849,17 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
(req->flags & REQ_F_ISREG))
goto copy_iov;
+ /* if we cannot lock the file position then punt */
+ if (unlikely(io_kiocb_update_pos(req, kiocb, &ppos, req->rw.len, true)))
+ goto copy_iov;
+
kiocb->ki_flags |= IOCB_NOWAIT;
} else {
+ io_kiocb_update_pos(req, kiocb, &ppos, req->rw.len, false);
/* Ensure we clear previously set non-block flag */
kiocb->ki_flags &= ~IOCB_NOWAIT;
}
- ppos = io_kiocb_update_pos(req, kiocb);
-
ret = rw_verify_area(WRITE, req->file, ppos, req->result);
if (unlikely(ret))
goto out_free;
@@ -3858,6 +3912,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
return ret ?: -EAGAIN;
}
out_free:
+ io_kiocb_done_pos(req, kiocb, 0);
/* it's reportedly faster than delegating the null check to kfree() */
if (iovec)
kfree(iovec);
--
2.30.2
On 2/21/22 22:16, Dylan Yudaken wrote:
> In read/write ops, preincrement f_pos when no offset is specified, and
> then attempt fix up the position after IO completes if it completed less
> than expected. This fixes the problem where multiple queued up IO will all
> obtain the same f_pos, and so perform the same read/write.
>
> This is still not as consistent as sync r/w, as it is able to advance the
> file offset past the end of the file. It seems it would be quite a
> performance hit to work around this limitation - such as by keeping track
> of concurrent operations - and the downside does not seem to be too
> problematic.
>
> The attempt to fix up the f_pos after will at least mean that in situations
> where a single operation is run, then the position will be consistent.
>
It's a little bit weird, when a read req returns x bytes read while f_pos
moves ahead y bytes where x isn't equal to y. Don't know if this causes
problems..
On Tue, 2022-02-22 at 15:34 +0800, Hao Xu wrote:
>
> On 2/21/22 22:16, Dylan Yudaken wrote:
> > In read/write ops, preincrement f_pos when no offset is specified,
> > and
> > then attempt fix up the position after IO completes if it completed
> > less
> > than expected. This fixes the problem where multiple queued up IO
> > will all
> > obtain the same f_pos, and so perform the same read/write.
> >
> > This is still not as consistent as sync r/w, as it is able to
> > advance the
> > file offset past the end of the file. It seems it would be quite a
> > performance hit to work around this limitation - such as by keeping
> > track
> > of concurrent operations - and the downside does not seem to be too
> > problematic.
> >
> > The attempt to fix up the f_pos after will at least mean that in
> > situations
> > where a single operation is run, then the position will be
> > consistent.
> >
> It's a little bit weird, when a read req returns x bytes read while
> f_pos
>
> moves ahead y bytes where x isn't equal to y. Don't know if this
> causes
>
> problems..
>
It seems to be ok - as in nothing crashes when f_pos is past the end of
the file - but I really am not an expert on these things so am happy to
receive feedback on this.