2020-10-26 19:33:53

by Willem de Bruijn

[permalink] [raw]
Subject: [PATCH] epoll: add nsec timeout support

From: Willem de Bruijn <[email protected]>

The underlying hrtimer is programmed with nanosecond resolution.

Use cases such as datacenter networking operate on timescales well
below milliseconds. Setting shorter timeouts bounds tail latency.

Add epoll_create1 flag EPOLL_NSTIMEO. When passed, this changes the
interpretation of argument timeout in epoll_wait from msec to nsec.

The new eventpoll state fits in existing 4B of padding when busy poll
is compiled in (the default), and reads the same cacheline.

Signed-off-by: Willem de Bruijn <[email protected]>

---

Selftest for now at github. Can follow-up for kselftests.
https://github.com/wdebruij/kerneltools/blob/master/tests/epoll_nstimeo.c
---
fs/eventpoll.c | 26 +++++++++++++++++++-------
include/uapi/linux/eventpoll.h | 1 +
2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4df61129566d..1216b909d155 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -225,6 +225,9 @@ struct eventpoll {
unsigned int napi_id;
#endif

+ /* Accept timeout in ns resolution (EPOLL_NSTIMEO) */
+ unsigned int nstimeout:1;
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/* tracks wakeup nests for lockdep validation */
u8 nests;
@@ -1787,17 +1790,20 @@ static int ep_send_events(struct eventpoll *ep,
return esed.res;
}

-static inline struct timespec64 ep_set_mstimeout(long ms)
+static inline struct timespec64 ep_set_nstimeout(long ns)
{
- struct timespec64 now, ts = {
- .tv_sec = ms / MSEC_PER_SEC,
- .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
- };
+ struct timespec64 now, ts;

+ ts = ns_to_timespec64(ns);
ktime_get_ts64(&now);
return timespec64_add_safe(now, ts);
}

+static inline struct timespec64 ep_set_mstimeout(long ms)
+{
+ return ep_set_nstimeout(NSEC_PER_MSEC * ms);
+}
+
/**
* ep_poll - Retrieves ready events, and delivers them to the caller supplied
* event buffer.
@@ -1826,7 +1832,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
lockdep_assert_irqs_enabled();

if (timeout > 0) {
- struct timespec64 end_time = ep_set_mstimeout(timeout);
+ struct timespec64 end_time;
+
+ end_time = ep->nstimeout ? ep_set_nstimeout(timeout) :
+ ep_set_mstimeout(timeout);

slack = select_estimate_accuracy(&end_time);
to = &expires;
@@ -2046,7 +2055,7 @@ static int do_epoll_create(int flags)
/* Check the EPOLL_* constant for consistency. */
BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);

- if (flags & ~EPOLL_CLOEXEC)
+ if (flags & ~(EPOLL_CLOEXEC | EPOLL_NSTIMEO))
return -EINVAL;
/*
* Create the internal data structure ("struct eventpoll").
@@ -2054,6 +2063,9 @@ static int do_epoll_create(int flags)
error = ep_alloc(&ep);
if (error < 0)
return error;
+
+ ep->nstimeout = !!(flags & EPOLL_NSTIMEO);
+
/*
* Creates all the items needed to setup an eventpoll file. That is,
* a file structure and a free file descriptor.
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index 8a3432d0f0dc..f6ef9c9f8ac2 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -21,6 +21,7 @@

/* Flags for epoll_create1. */
#define EPOLL_CLOEXEC O_CLOEXEC
+#define EPOLL_NSTIMEO 0x1

/* Valid opcodes to issue to sys_epoll_ctl() */
#define EPOLL_CTL_ADD 1
--
2.29.0.rc1.297.gfa9743e501-goog


2020-10-26 21:01:55

by Soheil Hassas Yeganeh

[permalink] [raw]
Subject: Re: [PATCH] epoll: add nsec timeout support

On Mon, Oct 26, 2020 at 12:08 PM Willem de Bruijn
<[email protected]> wrote:
>
> From: Willem de Bruijn <[email protected]>
>
> The underlying hrtimer is programmed with nanosecond resolution.
>
> Use cases such as datacenter networking operate on timescales well
> below milliseconds. Setting shorter timeouts bounds tail latency.
>
> Add epoll_create1 flag EPOLL_NSTIMEO. When passed, this changes the
> interpretation of argument timeout in epoll_wait from msec to nsec.
>
> The new eventpoll state fits in existing 4B of padding when busy poll
> is compiled in (the default), and reads the same cacheline.
>
> Signed-off-by: Willem de Bruijn <[email protected]>

Acked-by: Soheil Hassas Yeganeh <[email protected]>

Thanks for adding the feature!

> ---
>
> Selftest for now at github. Can follow-up for kselftests.
> https://github.com/wdebruij/kerneltools/blob/master/tests/epoll_nstimeo.c
> ---
> fs/eventpoll.c | 26 +++++++++++++++++++-------
> include/uapi/linux/eventpoll.h | 1 +
> 2 files changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index 4df61129566d..1216b909d155 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -225,6 +225,9 @@ struct eventpoll {
> unsigned int napi_id;
> #endif
>
> + /* Accept timeout in ns resolution (EPOLL_NSTIMEO) */
> + unsigned int nstimeout:1;
> +
> #ifdef CONFIG_DEBUG_LOCK_ALLOC
> /* tracks wakeup nests for lockdep validation */
> u8 nests;
> @@ -1787,17 +1790,20 @@ static int ep_send_events(struct eventpoll *ep,
> return esed.res;
> }
>
> -static inline struct timespec64 ep_set_mstimeout(long ms)
> +static inline struct timespec64 ep_set_nstimeout(long ns)
> {
> - struct timespec64 now, ts = {
> - .tv_sec = ms / MSEC_PER_SEC,
> - .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
> - };
> + struct timespec64 now, ts;
>
> + ts = ns_to_timespec64(ns);
> ktime_get_ts64(&now);
> return timespec64_add_safe(now, ts);
> }
>
> +static inline struct timespec64 ep_set_mstimeout(long ms)
> +{
> + return ep_set_nstimeout(NSEC_PER_MSEC * ms);
> +}
> +
> /**
> * ep_poll - Retrieves ready events, and delivers them to the caller supplied
> * event buffer.
> @@ -1826,7 +1832,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
> lockdep_assert_irqs_enabled();
>
> if (timeout > 0) {
> - struct timespec64 end_time = ep_set_mstimeout(timeout);
> + struct timespec64 end_time;
> +
> + end_time = ep->nstimeout ? ep_set_nstimeout(timeout) :
> + ep_set_mstimeout(timeout);
>
> slack = select_estimate_accuracy(&end_time);
> to = &expires;
> @@ -2046,7 +2055,7 @@ static int do_epoll_create(int flags)
> /* Check the EPOLL_* constant for consistency. */
> BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
>
> - if (flags & ~EPOLL_CLOEXEC)
> + if (flags & ~(EPOLL_CLOEXEC | EPOLL_NSTIMEO))
> return -EINVAL;
> /*
> * Create the internal data structure ("struct eventpoll").
> @@ -2054,6 +2063,9 @@ static int do_epoll_create(int flags)
> error = ep_alloc(&ep);
> if (error < 0)
> return error;
> +
> + ep->nstimeout = !!(flags & EPOLL_NSTIMEO);
> +
> /*
> * Creates all the items needed to setup an eventpoll file. That is,
> * a file structure and a free file descriptor.
> diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
> index 8a3432d0f0dc..f6ef9c9f8ac2 100644
> --- a/include/uapi/linux/eventpoll.h
> +++ b/include/uapi/linux/eventpoll.h
> @@ -21,6 +21,7 @@
>
> /* Flags for epoll_create1. */
> #define EPOLL_CLOEXEC O_CLOEXEC
> +#define EPOLL_NSTIMEO 0x1
>
> /* Valid opcodes to issue to sys_epoll_ctl() */
> #define EPOLL_CTL_ADD 1
> --
> 2.29.0.rc1.297.gfa9743e501-goog
>