2024-02-09 21:22:42

by Joe Damato

[permalink] [raw]
Subject: [PATCH net-next v7 4/4] eventpoll: Add epoll ioctl for epoll_params

Add an ioctl for getting and setting epoll_params. User programs can use
this ioctl to get and set the busy poll usec time, packet budget, and
prefer busy poll params for a specific epoll context.

Parameters are limited:
- busy_poll_usecs is limited to <= s32_max
- busy_poll_budget is limited to <= NAPI_POLL_WEIGHT by unprivileged
users (!capable(CAP_NET_ADMIN))
- prefer_busy_poll must be 0 or 1
- __pad must be 0

Signed-off-by: Joe Damato <[email protected]>
Acked-by: Stanislav Fomichev <[email protected]>
Reviewed-by: Jiri Slaby <[email protected]>
---
.../userspace-api/ioctl/ioctl-number.rst | 1 +
fs/eventpoll.c | 72 +++++++++++++++++++
include/uapi/linux/eventpoll.h | 13 ++++
3 files changed, 86 insertions(+)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 457e16f06e04..b33918232f78 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -309,6 +309,7 @@ Code Seq# Include File Comments
0x89 0B-DF linux/sockios.h
0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
+0x8A 00-1F linux/eventpoll.h
0x8B all linux/wireless.h
0x8C 00-3F WiNRADiO driver
<http://www.winradio.com.au/>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1b8d01af0c2c..aa58d42737e6 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -37,6 +37,7 @@
#include <linux/seq_file.h>
#include <linux/compat.h>
#include <linux/rculist.h>
+#include <linux/capability.h>
#include <net/busy_poll.h>

/*
@@ -494,6 +495,49 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
ep->napi_id = napi_id;
}

+static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct eventpoll *ep = file->private_data;
+ void __user *uarg = (void __user *)arg;
+ struct epoll_params epoll_params;
+
+ switch (cmd) {
+ case EPIOCSPARAMS:
+ if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
+ return -EFAULT;
+
+ /* pad byte must be zero */
+ if (epoll_params.__pad)
+ return -EINVAL;
+
+ if (epoll_params.busy_poll_usecs > S32_MAX)
+ return -EINVAL;
+
+ if (epoll_params.prefer_busy_poll > 1)
+ return -EINVAL;
+
+ if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
+ !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ ep->busy_poll_usecs = epoll_params.busy_poll_usecs;
+ ep->busy_poll_budget = epoll_params.busy_poll_budget;
+ ep->prefer_busy_poll = epoll_params.prefer_busy_poll;
+ return 0;
+ case EPIOCGPARAMS:
+ memset(&epoll_params, 0, sizeof(epoll_params));
+ epoll_params.busy_poll_usecs = ep->busy_poll_usecs;
+ epoll_params.busy_poll_budget = ep->busy_poll_budget;
+ epoll_params.prefer_busy_poll = ep->prefer_busy_poll;
+ if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params)))
+ return -EFAULT;
+ return 0;
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
#else

static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
@@ -505,6 +549,12 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
{
}

+static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return -EOPNOTSUPP;
+}
+
#endif /* CONFIG_NET_RX_BUSY_POLL */

/*
@@ -864,6 +914,26 @@ static void ep_clear_and_put(struct eventpoll *ep)
ep_free(ep);
}

+static long ep_eventpoll_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ int ret;
+
+ if (!is_file_epoll(file))
+ return -EINVAL;
+
+ switch (cmd) {
+ case EPIOCSPARAMS:
+ case EPIOCGPARAMS:
+ ret = ep_eventpoll_bp_ioctl(file, cmd, arg);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
static int ep_eventpoll_release(struct inode *inode, struct file *file)
{
struct eventpoll *ep = file->private_data;
@@ -970,6 +1040,8 @@ static const struct file_operations eventpoll_fops = {
.release = ep_eventpoll_release,
.poll = ep_eventpoll_poll,
.llseek = noop_llseek,
+ .unlocked_ioctl = ep_eventpoll_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};

/*
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index cfbcc4cc49ac..4f4b948ef381 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -85,4 +85,17 @@ struct epoll_event {
__u64 data;
} EPOLL_PACKED;

+struct epoll_params {
+ __u32 busy_poll_usecs;
+ __u16 busy_poll_budget;
+ __u8 prefer_busy_poll;
+
+ /* pad the struct to a multiple of 64bits */
+ __u8 __pad;
+};
+
+#define EPOLL_IOC_TYPE 0x8A
+#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
+#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
+
#endif /* _UAPI_LINUX_EVENTPOLL_H */
--
2.25.1



2024-02-11 09:41:13

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH net-next v7 4/4] eventpoll: Add epoll ioctl for epoll_params

On Fri, Feb 9, 2024 at 10:15 PM Joe Damato <[email protected]> wrote:
>
> Add an ioctl for getting and setting epoll_params. User programs can use
> this ioctl to get and set the busy poll usec time, packet budget, and
> prefer busy poll params for a specific epoll context.
>
> Parameters are limited:
> - busy_poll_usecs is limited to <= s32_max
> - busy_poll_budget is limited to <= NAPI_POLL_WEIGHT by unprivileged
> users (!capable(CAP_NET_ADMIN))
> - prefer_busy_poll must be 0 or 1
> - __pad must be 0
>
> Signed-off-by: Joe Damato <[email protected]>
> Acked-by: Stanislav Fomichev <[email protected]>
> Reviewed-by: Jiri Slaby <[email protected]>
> ---
> .../userspace-api/ioctl/ioctl-number.rst | 1 +
> fs/eventpoll.c | 72 +++++++++++++++++++
> include/uapi/linux/eventpoll.h | 13 ++++
> 3 files changed, 86 insertions(+)
>
> diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
> index 457e16f06e04..b33918232f78 100644
> --- a/Documentation/userspace-api/ioctl/ioctl-number.rst
> +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
> @@ -309,6 +309,7 @@ Code Seq# Include File Comments
> 0x89 0B-DF linux/sockios.h
> 0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
> 0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
> +0x8A 00-1F linux/eventpoll.h
> 0x8B all linux/wireless.h
> 0x8C 00-3F WiNRADiO driver
> <http://www.winradio.com.au/>
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index 1b8d01af0c2c..aa58d42737e6 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -37,6 +37,7 @@
> #include <linux/seq_file.h>
> #include <linux/compat.h>
> #include <linux/rculist.h>
> +#include <linux/capability.h>
> #include <net/busy_poll.h>
>
> /*
> @@ -494,6 +495,49 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
> ep->napi_id = napi_id;
> }
>
> +static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
> + unsigned long arg)
> +{
> + struct eventpoll *ep = file->private_data;
> + void __user *uarg = (void __user *)arg;
> + struct epoll_params epoll_params;
> +
> + switch (cmd) {
> + case EPIOCSPARAMS:
> + if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
> + return -EFAULT;
> +
> + /* pad byte must be zero */
> + if (epoll_params.__pad)
> + return -EINVAL;
> +
> + if (epoll_params.busy_poll_usecs > S32_MAX)
> + return -EINVAL;
> +
> + if (epoll_params.prefer_busy_poll > 1)
> + return -EINVAL;
> +
> + if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
> + !capable(CAP_NET_ADMIN))
> + return -EPERM;
> +
> + ep->busy_poll_usecs = epoll_params.busy_poll_usecs;

You need WRITE_ONCE(ep->XXX, val); for all these settings.

> + ep->busy_poll_budget = epoll_params.busy_poll_budget;
> + ep->prefer_busy_poll = epoll_params.prefer_busy_poll;
> + return 0;
> + case EPIOCGPARAMS:
> + memset(&epoll_params, 0, sizeof(epoll_params));
> + epoll_params.busy_poll_usecs = ep->busy_poll_usecs;

You need to use READ_ONCE(ep->XXXXX) for the three reads.


> + epoll_params.busy_poll_budget = ep->busy_poll_budget;
> + epoll_params.prefer_busy_poll = ep->prefer_busy_poll;
> + if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params)))
> + return -EFAULT;
> + return 0;
> + default:
> + return -ENOIOCTLCMD;
> + }
> +}
>