Subject: [PATCH] eventfd: protect eventfd_wake_count with a local_lock

eventfd_signal assumes that spin_lock_irqsave/spin_unlock_irqrestore is
non-preemptable and therefore increments and decrements the percpu
variable inside the critical section.

This obviously does not fly with PREEMPT_RT. If eventfd_signal is
preempted and an unrelated thread calls eventfd_signal, the result is
a spurious WARN. To avoid this, protect the percpu variable with a
local_lock.

Reported-by: Daniel Bristot de Oliveira <[email protected]>
Fixes: b5e683d5cab8 ("eventfd: track eventfd_signal() recursion depth")
Cc: He Zhe <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Alexander Viro <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Sebastian Andrzej Siewior <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Co-developed-by: Paolo Bonzini <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
Signed-off-by: Daniel Bristot de Oliveira <[email protected]>
---
fs/eventfd.c | 27 ++++++++++++++++++++++-----
include/linux/eventfd.h | 7 +------
2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/fs/eventfd.c b/fs/eventfd.c
index e265b6dd4f34..9754fcd38690 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -12,6 +12,7 @@
#include <linux/fs.h>
#include <linux/sched/signal.h>
#include <linux/kernel.h>
+#include <linux/local_lock.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/spinlock.h>
@@ -25,8 +26,6 @@
#include <linux/idr.h>
#include <linux/uio.h>

-DEFINE_PER_CPU(int, eventfd_wake_count);
-
static DEFINE_IDA(eventfd_ida);

struct eventfd_ctx {
@@ -45,6 +44,20 @@ struct eventfd_ctx {
int id;
};

+struct event_fd_recursion {
+ local_lock_t lock;
+ int count;
+};
+
+static DEFINE_PER_CPU(struct event_fd_recursion, event_fd_recursion) = {
+ .lock = INIT_LOCAL_LOCK(lock),
+};
+
+bool eventfd_signal_count(void)
+{
+ return this_cpu_read(event_fd_recursion.count);
+}
+
/**
* eventfd_signal - Adds @n to the eventfd counter.
* @ctx: [in] Pointer to the eventfd context.
@@ -71,18 +84,22 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
* it returns true, the eventfd_signal() call should be deferred to a
* safe context.
*/
- if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
+ local_lock(&event_fd_recursion.lock);
+ if (WARN_ON_ONCE(this_cpu_read(event_fd_recursion.count))) {
+ local_unlock(&event_fd_recursion.lock);
return 0;
+ }

spin_lock_irqsave(&ctx->wqh.lock, flags);
- this_cpu_inc(eventfd_wake_count);
+ this_cpu_inc(event_fd_recursion.count);
if (ULLONG_MAX - ctx->count < n)
n = ULLONG_MAX - ctx->count;
ctx->count += n;
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, EPOLLIN);
- this_cpu_dec(eventfd_wake_count);
+ this_cpu_dec(event_fd_recursion.count);
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+ local_unlock(&event_fd_recursion.lock);

return n;
}
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index fa0a524baed0..ca89d6c409c1 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -43,12 +43,7 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
__u64 *cnt);
void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);

-DECLARE_PER_CPU(int, eventfd_wake_count);
-
-static inline bool eventfd_signal_count(void)
-{
- return this_cpu_read(eventfd_wake_count);
-}
+bool eventfd_signal_count(void);

#else /* CONFIG_EVENTFD */

--
2.31.1


2021-07-23 14:55:17

by Nicolas Saenz Julienne

[permalink] [raw]
Subject: Re: [PATCH] eventfd: protect eventfd_wake_count with a local_lock

On Mon, 19 Jul 2021 09:54:52 +0200, Daniel Bristot de Oliveira wrote:
> eventfd_signal assumes that spin_lock_irqsave/spin_unlock_irqrestore is
> non-preemptable and therefore increments and decrements the percpu
> variable inside the critical section.
>
> This obviously does not fly with PREEMPT_RT. If eventfd_signal is
> preempted and an unrelated thread calls eventfd_signal, the result is
> a spurious WARN. To avoid this, protect the percpu variable with a
> local_lock.
>
> Reported-by: Daniel Bristot de Oliveira <[email protected]>
> Fixes: b5e683d5cab8 ("eventfd: track eventfd_signal() recursion depth")
> Cc: He Zhe <[email protected]>
> Cc: Jens Axboe <[email protected]>
> Cc: Alexander Viro <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: Sebastian Andrzej Siewior <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Co-developed-by: Paolo Bonzini <[email protected]>
> Signed-off-by: Paolo Bonzini <[email protected]>
> Signed-off-by: Daniel Bristot de Oliveira <[email protected]>
> ---

Tested-by: Nicolas Saenz Julienne <[email protected]>

Thanks!

--
Nicolás Sáenz