2020-05-01 17:20:08

by Jens Axboe

[permalink] [raw]
Subject: [PATCH v2] eventfd: convert to f_op->read_iter()

eventfd is using ->read() as it's file_operations read handler, but
this prevents passing in information about whether a given IO operation
is blocking or not. We can only use the file flags for that. To support
async (-EAGAIN/poll based) retries for io_uring, we need ->read_iter()
support. Convert eventfd to using ->read_iter().

Signed-off-by: Jens Axboe <[email protected]>

---

Since v1:

- Add FMODE_NOWAIT to the eventfd file

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 78e41c7c3d05..d590c2141d39 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -216,10 +216,11 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
}
EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);

-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
- loff_t *ppos)
+static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *iov)
{
+ struct file *file = iocb->ki_filp;
struct eventfd_ctx *ctx = file->private_data;
+ size_t count = iov_iter_count(iov);
ssize_t res;
__u64 ucnt = 0;
DECLARE_WAITQUEUE(wait, current);
@@ -231,7 +232,8 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
res = -EAGAIN;
if (ctx->count > 0)
res = sizeof(ucnt);
- else if (!(file->f_flags & O_NONBLOCK)) {
+ else if (!(file->f_flags & O_NONBLOCK) &&
+ !(iocb->ki_flags & IOCB_NOWAIT)) {
__add_wait_queue(&ctx->wqh, &wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -257,7 +259,7 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
}
spin_unlock_irq(&ctx->wqh.lock);

- if (res > 0 && put_user(ucnt, (__u64 __user *)buf))
+ if (res > 0 && copy_to_iter(&ucnt, res, iov) < res)
return -EFAULT;

return res;
@@ -329,7 +331,7 @@ static const struct file_operations eventfd_fops = {
#endif
.release = eventfd_release,
.poll = eventfd_poll,
- .read = eventfd_read,
+ .read_iter = eventfd_read,
.write = eventfd_write,
.llseek = noop_llseek,
};
@@ -427,8 +429,17 @@ static int do_eventfd(unsigned int count, int flags)

fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
- if (fd < 0)
+ if (fd < 0) {
eventfd_free_ctx(ctx);
+ } else {
+ struct file *file;
+
+ file = fget(fd);
+ if (file) {
+ file->f_mode |= FMODE_NOWAIT;
+ fput(file);
+ }
+ }

return fd;
}

--
Jens Axboe


2020-05-01 17:45:25

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH v2] eventfd: convert to f_op->read_iter()

On Fri, May 01, 2020 at 11:18:05AM -0600, Jens Axboe wrote:

> - if (res > 0 && put_user(ucnt, (__u64 __user *)buf))
> + if (res > 0 && copy_to_iter(&ucnt, res, iov) < res)

*whoa*

It is correct, but only because here res > 0 <=> res == 8.
And that's not trivial at the first glance.

Please, turn that into something like

if (iov_iter_count(to) < sizeof(ucnt))
return -EINVAL;
spin_lock_irq(&ctx->wqh.lock);
if (!ctx->count) {
if (unlikely(file->f_flags & O_NONBLOCK) {
spin_unlock_irq(&ctx->wqh.lock)
return -EAGAIN;
}
__add_wait_queue(&ctx->wqh, &wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (ctx->count)
break;
if (signal_pending(current)) {
spin_unlock_irq(&ctx->wqh.lock)
return -ERESTARTSYS;
}
spin_unlock_irq(&ctx->wqh.lock);
schedule();
spin_lock_irq(&ctx->wqh.lock);
}
__remove_wait_queue(&ctx->wqh, &wait);
__set_current_state(TASK_RUNNING);
}
eventfd_ctx_do_read(ctx, &ucnt);
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
spin_unlock_irq(&ctx->wqh.lock);
if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt)))
return -EFAULT;
return sizeof(ucnt);

2020-05-01 17:51:02

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH v2] eventfd: convert to f_op->read_iter()

On 5/1/20 11:43 AM, Al Viro wrote:
> On Fri, May 01, 2020 at 11:18:05AM -0600, Jens Axboe wrote:
>
>> - if (res > 0 && put_user(ucnt, (__u64 __user *)buf))
>> + if (res > 0 && copy_to_iter(&ucnt, res, iov) < res)
>
> *whoa*
>
> It is correct, but only because here res > 0 <=> res == 8.
> And that's not trivial at the first glance.
>
> Please, turn that into something like

Looks good to me, just one minor edit:

> if (iov_iter_count(to) < sizeof(ucnt))
> return -EINVAL;
> spin_lock_irq(&ctx->wqh.lock);
> if (!ctx->count) {
> if (unlikely(file->f_flags & O_NONBLOCK) {
> spin_unlock_irq(&ctx->wqh.lock)
> return -EAGAIN;
> }
> __add_wait_queue(&ctx->wqh, &wait);
> for (;;) {
> set_current_state(TASK_INTERRUPTIBLE);
> if (ctx->count)
> break;
> if (signal_pending(current)) {
> spin_unlock_irq(&ctx->wqh.lock)
> return -ERESTARTSYS;
> }

We need to remove waitq and re-set task state here. I'll run a sanity
check on that and send out a v3.

--
Jens Axboe