Add an ioctl for getting and setting epoll_params. User programs can use
this ioctl to get and set the busy poll usec time or packet budget
params for a specific epoll context.
Signed-off-by: Joe Damato <[email protected]>
---
.../userspace-api/ioctl/ioctl-number.rst | 1 +
fs/eventpoll.c | 64 +++++++++++++++++++
include/uapi/linux/eventpoll.h | 12 ++++
3 files changed, 77 insertions(+)
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 457e16f06e04..b33918232f78 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -309,6 +309,7 @@ Code Seq# Include File Comments
0x89 0B-DF linux/sockios.h
0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
+0x8A 00-1F linux/eventpoll.h
0x8B all linux/wireless.h
0x8C 00-3F WiNRADiO driver
<http://www.winradio.com.au/>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 40bd97477b91..73ae886efb8a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -6,6 +6,8 @@
* Davide Libenzi <[email protected]>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/sched/signal.h>
@@ -37,6 +39,7 @@
#include <linux/seq_file.h>
#include <linux/compat.h>
#include <linux/rculist.h>
+#include <linux/capability.h>
#include <net/busy_poll.h>
/*
@@ -495,6 +498,39 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
ep->napi_id = napi_id;
}
+static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct eventpoll *ep;
+ struct epoll_params epoll_params;
+ void __user *uarg = (void __user *) arg;
+
+ ep = file->private_data;
+
+ switch (cmd) {
+ case EPIOCSPARAMS:
+ if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
+ return -EFAULT;
+
+ if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
+ !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ ep->busy_poll_usecs = epoll_params.busy_poll_usecs;
+ ep->busy_poll_budget = epoll_params.busy_poll_budget;
+ return 0;
+ case EPIOCGPARAMS:
+ memset(&epoll_params, 0, sizeof(epoll_params));
+ epoll_params.busy_poll_usecs = ep->busy_poll_usecs;
+ epoll_params.busy_poll_budget = ep->busy_poll_budget;
+ if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params)))
+ return -EFAULT;
+ return 0;
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
#else
static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
@@ -510,6 +546,12 @@ static inline bool ep_busy_loop_on(struct eventpoll *ep)
{
return false;
}
+
+static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_NET_RX_BUSY_POLL */
/*
@@ -869,6 +911,26 @@ static void ep_clear_and_put(struct eventpoll *ep)
ep_free(ep);
}
+static long ep_eventpoll_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ int ret;
+
+ if (!is_file_epoll(file))
+ return -EINVAL;
+
+ switch (cmd) {
+ case EPIOCSPARAMS:
+ case EPIOCGPARAMS:
+ ret = ep_eventpoll_bp_ioctl(file, cmd, arg);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
static int ep_eventpoll_release(struct inode *inode, struct file *file)
{
struct eventpoll *ep = file->private_data;
@@ -975,6 +1037,8 @@ static const struct file_operations eventpoll_fops = {
.release = ep_eventpoll_release,
.poll = ep_eventpoll_poll,
.llseek = noop_llseek,
+ .unlocked_ioctl = ep_eventpoll_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
/*
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index cfbcc4cc49ac..8eb0fdbce995 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -85,4 +85,16 @@ struct epoll_event {
__u64 data;
} EPOLL_PACKED;
+struct epoll_params {
+ u64 busy_poll_usecs;
+ u16 busy_poll_budget;
+
+ /* for future fields */
+ u8 data[118];
+} EPOLL_PACKED;
+
+#define EPOLL_IOC_TYPE 0x8A
+#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
+#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
+
#endif /* _UAPI_LINUX_EVENTPOLL_H */
--
2.25.1
On Thu, Jan 25, 2024 at 10:56:59PM +0000, Joe Damato wrote:
> +struct epoll_params {
> + u64 busy_poll_usecs;
> + u16 busy_poll_budget;
> +
> + /* for future fields */
> + u8 data[118];
> +} EPOLL_PACKED;
variables that cross the user/kernel boundry need to be __u64, __u16,
and __u8 here.
And why 118?
thanks,
greg k-h
On Thu, Jan 25, 2024 at 10:56:59PM +0000, Joe Damato wrote:
> Add an ioctl for getting and setting epoll_params. User programs can use
> this ioctl to get and set the busy poll usec time or packet budget
> params for a specific epoll context.
>
> Signed-off-by: Joe Damato <[email protected]>
> ---
> .../userspace-api/ioctl/ioctl-number.rst | 1 +
> fs/eventpoll.c | 64 +++++++++++++++++++
> include/uapi/linux/eventpoll.h | 12 ++++
> 3 files changed, 77 insertions(+)
>
> diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
> index 457e16f06e04..b33918232f78 100644
> --- a/Documentation/userspace-api/ioctl/ioctl-number.rst
> +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
> @@ -309,6 +309,7 @@ Code Seq# Include File Comments
> 0x89 0B-DF linux/sockios.h
> 0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
> 0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
> +0x8A 00-1F linux/eventpoll.h
> 0x8B all linux/wireless.h
> 0x8C 00-3F WiNRADiO driver
> <http://www.winradio.com.au/>
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index 40bd97477b91..73ae886efb8a 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -6,6 +6,8 @@
> * Davide Libenzi <[email protected]>
> */
>
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> #include <linux/init.h>
> #include <linux/kernel.h>
> #include <linux/sched/signal.h>
> @@ -37,6 +39,7 @@
> #include <linux/seq_file.h>
> #include <linux/compat.h>
> #include <linux/rculist.h>
> +#include <linux/capability.h>
> #include <net/busy_poll.h>
>
> /*
> @@ -495,6 +498,39 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
> ep->napi_id = napi_id;
> }
>
> +static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
> + unsigned long arg)
> +{
> + struct eventpoll *ep;
> + struct epoll_params epoll_params;
> + void __user *uarg = (void __user *) arg;
> +
> + ep = file->private_data;
> +
> + switch (cmd) {
> + case EPIOCSPARAMS:
> + if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
> + return -EFAULT;
> +
> + if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
> + !capable(CAP_NET_ADMIN))
> + return -EPERM;
> +
> + ep->busy_poll_usecs = epoll_params.busy_poll_usecs;
> + ep->busy_poll_budget = epoll_params.busy_poll_budget;
> + return 0;
> + case EPIOCGPARAMS:
> + memset(&epoll_params, 0, sizeof(epoll_params));
> + epoll_params.busy_poll_usecs = ep->busy_poll_usecs;
> + epoll_params.busy_poll_budget = ep->busy_poll_budget;
> + if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params)))
> + return -EFAULT;
> + return 0;
> + default:
> + return -ENOIOCTLCMD;
> + }
> +}
> +
> #else
>
> static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
> @@ -510,6 +546,12 @@ static inline bool ep_busy_loop_on(struct eventpoll *ep)
> {
> return false;
> }
> +
> +static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
> + unsigned long arg)
> +{
> + return -EOPNOTSUPP;
> +}
> #endif /* CONFIG_NET_RX_BUSY_POLL */
>
> /*
> @@ -869,6 +911,26 @@ static void ep_clear_and_put(struct eventpoll *ep)
> ep_free(ep);
> }
>
> +static long ep_eventpoll_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> +{
> + int ret;
> +
> + if (!is_file_epoll(file))
> + return -EINVAL;
> +
> + switch (cmd) {
> + case EPIOCSPARAMS:
> + case EPIOCGPARAMS:
> + ret = ep_eventpoll_bp_ioctl(file, cmd, arg);
> + break;
> + default:
> + ret = -EINVAL;
> + break;
> + }
> +
> + return ret;
> +}
> +
> static int ep_eventpoll_release(struct inode *inode, struct file *file)
> {
> struct eventpoll *ep = file->private_data;
> @@ -975,6 +1037,8 @@ static const struct file_operations eventpoll_fops = {
> .release = ep_eventpoll_release,
> .poll = ep_eventpoll_poll,
> .llseek = noop_llseek,
> + .unlocked_ioctl = ep_eventpoll_ioctl,
> + .compat_ioctl = compat_ptr_ioctl,
> };
>
> /*
> diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
> index cfbcc4cc49ac..8eb0fdbce995 100644
> --- a/include/uapi/linux/eventpoll.h
> +++ b/include/uapi/linux/eventpoll.h
> @@ -85,4 +85,16 @@ struct epoll_event {
> __u64 data;
> } EPOLL_PACKED;
>
> +struct epoll_params {
> + u64 busy_poll_usecs;
> + u16 busy_poll_budget;
> +
> + /* for future fields */
> + u8 data[118];
You forgot to validate that "data" is set to 0, which means that this
would be useless. Why have this at all?
thanks,
greg k-h
On Thu, Jan 25, 2024 at 10:56:59PM +0000, Joe Damato wrote:
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -6,6 +6,8 @@
> * Davide Libenzi <[email protected]>
> */
>
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
Why this addition? You do not add any pr_*() calls in this patch at all
that I can see.
thanks,
greg k-h
On Thu, Jan 25, 2024 at 03:22:34PM -0800, Greg Kroah-Hartman wrote:
> On Thu, Jan 25, 2024 at 10:56:59PM +0000, Joe Damato wrote:
> > Add an ioctl for getting and setting epoll_params. User programs can use
> > this ioctl to get and set the busy poll usec time or packet budget
> > params for a specific epoll context.
> >
> > Signed-off-by: Joe Damato <[email protected]>
> > ---
> > .../userspace-api/ioctl/ioctl-number.rst | 1 +
> > fs/eventpoll.c | 64 +++++++++++++++++++
> > include/uapi/linux/eventpoll.h | 12 ++++
> > 3 files changed, 77 insertions(+)
> >
> > diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
> > index 457e16f06e04..b33918232f78 100644
> > --- a/Documentation/userspace-api/ioctl/ioctl-number.rst
> > +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
> > @@ -309,6 +309,7 @@ Code Seq# Include File Comments
> > 0x89 0B-DF linux/sockios.h
> > 0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
> > 0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
> > +0x8A 00-1F linux/eventpoll.h
> > 0x8B all linux/wireless.h
> > 0x8C 00-3F WiNRADiO driver
> > <http://www.winradio.com.au/>
> > diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> > index 40bd97477b91..73ae886efb8a 100644
> > --- a/fs/eventpoll.c
> > +++ b/fs/eventpoll.c
> > @@ -6,6 +6,8 @@
> > * Davide Libenzi <[email protected]>
> > */
> >
> > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> > +
> > #include <linux/init.h>
> > #include <linux/kernel.h>
> > #include <linux/sched/signal.h>
> > @@ -37,6 +39,7 @@
> > #include <linux/seq_file.h>
> > #include <linux/compat.h>
> > #include <linux/rculist.h>
> > +#include <linux/capability.h>
> > #include <net/busy_poll.h>
> >
> > /*
> > @@ -495,6 +498,39 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
> > ep->napi_id = napi_id;
> > }
> >
> > +static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
> > + unsigned long arg)
> > +{
> > + struct eventpoll *ep;
> > + struct epoll_params epoll_params;
> > + void __user *uarg = (void __user *) arg;
> > +
> > + ep = file->private_data;
> > +
> > + switch (cmd) {
> > + case EPIOCSPARAMS:
> > + if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
> > + return -EFAULT;
> > +
> > + if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
> > + !capable(CAP_NET_ADMIN))
> > + return -EPERM;
> > +
> > + ep->busy_poll_usecs = epoll_params.busy_poll_usecs;
> > + ep->busy_poll_budget = epoll_params.busy_poll_budget;
> > + return 0;
> > + case EPIOCGPARAMS:
> > + memset(&epoll_params, 0, sizeof(epoll_params));
> > + epoll_params.busy_poll_usecs = ep->busy_poll_usecs;
> > + epoll_params.busy_poll_budget = ep->busy_poll_budget;
> > + if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params)))
> > + return -EFAULT;
> > + return 0;
> > + default:
> > + return -ENOIOCTLCMD;
> > + }
> > +}
> > +
> > #else
> >
> > static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
> > @@ -510,6 +546,12 @@ static inline bool ep_busy_loop_on(struct eventpoll *ep)
> > {
> > return false;
> > }
> > +
> > +static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
> > + unsigned long arg)
> > +{
> > + return -EOPNOTSUPP;
> > +}
> > #endif /* CONFIG_NET_RX_BUSY_POLL */
> >
> > /*
> > @@ -869,6 +911,26 @@ static void ep_clear_and_put(struct eventpoll *ep)
> > ep_free(ep);
> > }
> >
> > +static long ep_eventpoll_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> > +{
> > + int ret;
> > +
> > + if (!is_file_epoll(file))
> > + return -EINVAL;
> > +
> > + switch (cmd) {
> > + case EPIOCSPARAMS:
> > + case EPIOCGPARAMS:
> > + ret = ep_eventpoll_bp_ioctl(file, cmd, arg);
> > + break;
> > + default:
> > + ret = -EINVAL;
> > + break;
> > + }
> > +
> > + return ret;
> > +}
> > +
> > static int ep_eventpoll_release(struct inode *inode, struct file *file)
> > {
> > struct eventpoll *ep = file->private_data;
> > @@ -975,6 +1037,8 @@ static const struct file_operations eventpoll_fops = {
> > .release = ep_eventpoll_release,
> > .poll = ep_eventpoll_poll,
> > .llseek = noop_llseek,
> > + .unlocked_ioctl = ep_eventpoll_ioctl,
> > + .compat_ioctl = compat_ptr_ioctl,
> > };
> >
> > /*
> > diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
> > index cfbcc4cc49ac..8eb0fdbce995 100644
> > --- a/include/uapi/linux/eventpoll.h
> > +++ b/include/uapi/linux/eventpoll.h
> > @@ -85,4 +85,16 @@ struct epoll_event {
> > __u64 data;
> > } EPOLL_PACKED;
> >
> > +struct epoll_params {
> > + u64 busy_poll_usecs;
> > + u16 busy_poll_budget;
> > +
> > + /* for future fields */
> > + u8 data[118];
>
> You forgot to validate that "data" is set to 0, which means that this
> would be useless. Why have this at all?
I included this because I (probably incorrectly) thought that there should
be some extra space in the struct for future additions if needed.
I am not sure if that is a recommended practice for this sort of thing or
not, but if it is I can add some validation.
Thanks for your time and effort in reviewing my code.
On Thu, Jan 25, 2024 at 03:20:37PM -0800, Greg Kroah-Hartman wrote:
> On Thu, Jan 25, 2024 at 10:56:59PM +0000, Joe Damato wrote:
> > --- a/fs/eventpoll.c
> > +++ b/fs/eventpoll.c
> > @@ -6,6 +6,8 @@
> > * Davide Libenzi <[email protected]>
> > */
> >
> > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> > +
>
> Why this addition? You do not add any pr_*() calls in this patch at all
> that I can see.
Thanks, I've removed this for the next version. It was a remnant from a
previous version.
On Thu, Jan 25, 2024 at 04:11:28PM -0800, Joe Damato wrote:
> On Thu, Jan 25, 2024 at 03:21:46PM -0800, Greg Kroah-Hartman wrote:
> > On Thu, Jan 25, 2024 at 10:56:59PM +0000, Joe Damato wrote:
> > > +struct epoll_params {
> > > + u64 busy_poll_usecs;
> > > + u16 busy_poll_budget;
> > > +
> > > + /* for future fields */
> > > + u8 data[118];
> > > +} EPOLL_PACKED;
> >
> > variables that cross the user/kernel boundry need to be __u64, __u16,
> > and __u8 here.
>
> I'll make that change for the next version, thank you.
>
> > And why 118?
>
> I chose this arbitrarily. I figured that a 128 byte struct would support 16
> u64s in the event that other fields needed to be added in the future. 118
> is what was left after the existing fields. There's almost certainly a
> better way to do this - or perhaps it is unnecessary as per your other
> message.
>
> I am not sure if leaving extra space in the struct is a recommended
> practice for ioctls or not - I thought I noticed some code that did and
> some that didn't in the kernel so I err'd on the side of leaving the space
> and probably did it in the worst way possible.
It's not really a good idea unless you know exactly what you are going
to do with it. Why not just have a new ioctl if you need new
information in the future? That's simpler, right?
thanks,
greg k-h
On Thu, Jan 25, 2024 at 03:21:46PM -0800, Greg Kroah-Hartman wrote:
> On Thu, Jan 25, 2024 at 10:56:59PM +0000, Joe Damato wrote:
> > +struct epoll_params {
> > + u64 busy_poll_usecs;
> > + u16 busy_poll_budget;
> > +
> > + /* for future fields */
> > + u8 data[118];
> > +} EPOLL_PACKED;
>
> variables that cross the user/kernel boundry need to be __u64, __u16,
> and __u8 here.
I'll make that change for the next version, thank you.
> And why 118?
I chose this arbitrarily. I figured that a 128 byte struct would support 16
u64s in the event that other fields needed to be added in the future. 118
is what was left after the existing fields. There's almost certainly a
better way to do this - or perhaps it is unnecessary as per your other
message.
I am not sure if leaving extra space in the struct is a recommended
practice for ioctls or not - I thought I noticed some code that did and
some that didn't in the kernel so I err'd on the side of leaving the space
and probably did it in the worst way possible.
Thanks,
Joe
On Thu, Jan 25, 2024 at 04:23:58PM -0800, Greg Kroah-Hartman wrote:
> On Thu, Jan 25, 2024 at 04:11:28PM -0800, Joe Damato wrote:
> > On Thu, Jan 25, 2024 at 03:21:46PM -0800, Greg Kroah-Hartman wrote:
> > > On Thu, Jan 25, 2024 at 10:56:59PM +0000, Joe Damato wrote:
> > > > +struct epoll_params {
> > > > + u64 busy_poll_usecs;
> > > > + u16 busy_poll_budget;
> > > > +
> > > > + /* for future fields */
> > > > + u8 data[118];
> > > > +} EPOLL_PACKED;
> > >
> > > variables that cross the user/kernel boundry need to be __u64, __u16,
> > > and __u8 here.
> >
> > I'll make that change for the next version, thank you.
> >
> > > And why 118?
> >
> > I chose this arbitrarily. I figured that a 128 byte struct would support 16
> > u64s in the event that other fields needed to be added in the future. 118
> > is what was left after the existing fields. There's almost certainly a
> > better way to do this - or perhaps it is unnecessary as per your other
> > message.
> >
> > I am not sure if leaving extra space in the struct is a recommended
> > practice for ioctls or not - I thought I noticed some code that did and
> > some that didn't in the kernel so I err'd on the side of leaving the space
> > and probably did it in the worst way possible.
>
> It's not really a good idea unless you know exactly what you are going
> to do with it. Why not just have a new ioctl if you need new
> information in the future? That's simpler, right?
Sure, that makes sense to me. I'll remove it in the v4 alongside the other
changes you've requested.
Thanks for your time and patience reviewing my code. I greatly appreciate
your helpful comments and feedback.
Thanks,
Joe
On Thu, Jan 25, 2024 at 06:36:30PM -0800, Joe Damato wrote:
> On Thu, Jan 25, 2024 at 04:23:58PM -0800, Greg Kroah-Hartman wrote:
> > On Thu, Jan 25, 2024 at 04:11:28PM -0800, Joe Damato wrote:
> > > On Thu, Jan 25, 2024 at 03:21:46PM -0800, Greg Kroah-Hartman wrote:
> > > > On Thu, Jan 25, 2024 at 10:56:59PM +0000, Joe Damato wrote:
> > > > > +struct epoll_params {
> > > > > + u64 busy_poll_usecs;
> > > > > + u16 busy_poll_budget;
> > > > > +
> > > > > + /* for future fields */
> > > > > + u8 data[118];
> > > > > +} EPOLL_PACKED;
> > > >
> > > > variables that cross the user/kernel boundry need to be __u64, __u16,
> > > > and __u8 here.
> > >
> > > I'll make that change for the next version, thank you.
> > >
> > > > And why 118?
> > >
> > > I chose this arbitrarily. I figured that a 128 byte struct would support 16
> > > u64s in the event that other fields needed to be added in the future. 118
> > > is what was left after the existing fields. There's almost certainly a
> > > better way to do this - or perhaps it is unnecessary as per your other
> > > message.
> > >
> > > I am not sure if leaving extra space in the struct is a recommended
> > > practice for ioctls or not - I thought I noticed some code that did and
> > > some that didn't in the kernel so I err'd on the side of leaving the space
> > > and probably did it in the worst way possible.
> >
> > It's not really a good idea unless you know exactly what you are going
> > to do with it. Why not just have a new ioctl if you need new
> > information in the future? That's simpler, right?
>
> Sure, that makes sense to me. I'll remove it in the v4 alongside the other
> changes you've requested.
Fwiw, we do support extensible ioctls since they encode the size. Take a
look at kernel/seccomp.c. It's a clean extensible interface built on top
of the copy_struct_from_user() pattern we added for system calls
(openat(), clone3() etc.):
static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct seccomp_filter *filter = file->private_data;
void __user *buf = (void __user *)arg;
/* Fixed-size ioctls */
switch (cmd) {
case SECCOMP_IOCTL_NOTIF_RECV:
return seccomp_notify_recv(filter, buf);
case SECCOMP_IOCTL_NOTIF_SEND:
return seccomp_notify_send(filter, buf);
case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
case SECCOMP_IOCTL_NOTIF_ID_VALID:
return seccomp_notify_id_valid(filter, buf);
case SECCOMP_IOCTL_NOTIF_SET_FLAGS:
return seccomp_notify_set_flags(filter, arg);
}
/* Extensible Argument ioctls */
#define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK))
switch (EA_IOCTL(cmd)) {
case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD):
return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd));
default:
return -EINVAL;
}
}
static long seccomp_notify_addfd(struct seccomp_filter *filter,
struct seccomp_notif_addfd __user *uaddfd,
unsigned int size)
{
struct seccomp_notif_addfd addfd;
struct seccomp_knotif *knotif;
struct seccomp_kaddfd kaddfd;
int ret;
BUILD_BUG_ON(sizeof(addfd) < SECCOMP_NOTIFY_ADDFD_SIZE_VER0);
BUILD_BUG_ON(sizeof(addfd) != SECCOMP_NOTIFY_ADDFD_SIZE_LATEST);
if (size < SECCOMP_NOTIFY_ADDFD_SIZE_VER0 || size >= PAGE_SIZE)
return -EINVAL;
ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
if (ret)
return ret;
On Fri, Jan 26, 2024 at 11:07:36AM +0100, Christian Brauner wrote:
> On Thu, Jan 25, 2024 at 06:36:30PM -0800, Joe Damato wrote:
> > On Thu, Jan 25, 2024 at 04:23:58PM -0800, Greg Kroah-Hartman wrote:
> > > On Thu, Jan 25, 2024 at 04:11:28PM -0800, Joe Damato wrote:
> > > > On Thu, Jan 25, 2024 at 03:21:46PM -0800, Greg Kroah-Hartman wrote:
> > > > > On Thu, Jan 25, 2024 at 10:56:59PM +0000, Joe Damato wrote:
> > > > > > +struct epoll_params {
> > > > > > + u64 busy_poll_usecs;
> > > > > > + u16 busy_poll_budget;
> > > > > > +
> > > > > > + /* for future fields */
> > > > > > + u8 data[118];
> > > > > > +} EPOLL_PACKED;
> > > > >
> > > > > variables that cross the user/kernel boundry need to be __u64, __u16,
> > > > > and __u8 here.
> > > >
> > > > I'll make that change for the next version, thank you.
> > > >
> > > > > And why 118?
> > > >
> > > > I chose this arbitrarily. I figured that a 128 byte struct would support 16
> > > > u64s in the event that other fields needed to be added in the future. 118
> > > > is what was left after the existing fields. There's almost certainly a
> > > > better way to do this - or perhaps it is unnecessary as per your other
> > > > message.
> > > >
> > > > I am not sure if leaving extra space in the struct is a recommended
> > > > practice for ioctls or not - I thought I noticed some code that did and
> > > > some that didn't in the kernel so I err'd on the side of leaving the space
> > > > and probably did it in the worst way possible.
> > >
> > > It's not really a good idea unless you know exactly what you are going
> > > to do with it. Why not just have a new ioctl if you need new
> > > information in the future? That's simpler, right?
> >
> > Sure, that makes sense to me. I'll remove it in the v4 alongside the other
> > changes you've requested.
>
> Fwiw, we do support extensible ioctls since they encode the size. Take a
> look at kernel/seccomp.c. It's a clean extensible interface built on top
> of the copy_struct_from_user() pattern we added for system calls
> (openat(), clone3() etc.):
>
> static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
> unsigned long arg)
> {
> struct seccomp_filter *filter = file->private_data;
> void __user *buf = (void __user *)arg;
>
> /* Fixed-size ioctls */
> switch (cmd) {
> case SECCOMP_IOCTL_NOTIF_RECV:
> return seccomp_notify_recv(filter, buf);
> case SECCOMP_IOCTL_NOTIF_SEND:
> return seccomp_notify_send(filter, buf);
> case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
> case SECCOMP_IOCTL_NOTIF_ID_VALID:
> return seccomp_notify_id_valid(filter, buf);
> case SECCOMP_IOCTL_NOTIF_SET_FLAGS:
> return seccomp_notify_set_flags(filter, arg);
> }
>
> /* Extensible Argument ioctls */
> #define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK))
> switch (EA_IOCTL(cmd)) {
> case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD):
> return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd));
> default:
> return -EINVAL;
> }
> }
>
> static long seccomp_notify_addfd(struct seccomp_filter *filter,
> struct seccomp_notif_addfd __user *uaddfd,
> unsigned int size)
> {
> struct seccomp_notif_addfd addfd;
> struct seccomp_knotif *knotif;
> struct seccomp_kaddfd kaddfd;
> int ret;
>
> BUILD_BUG_ON(sizeof(addfd) < SECCOMP_NOTIFY_ADDFD_SIZE_VER0);
> BUILD_BUG_ON(sizeof(addfd) != SECCOMP_NOTIFY_ADDFD_SIZE_LATEST);
>
> if (size < SECCOMP_NOTIFY_ADDFD_SIZE_VER0 || size >= PAGE_SIZE)
> return -EINVAL;
>
> ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
> if (ret)
> return ret;
Thanks; that's a really helpful note and example.
I'm inclined to believe that new fields probably won't be needed for a
while, but if they are: an extensible ioctl could be added in the future
to deal with that problem at that point.
Thanks,
Joe
On Fri, Jan 26, 2024, at 03:36, Joe Damato wrote:
> On Thu, Jan 25, 2024 at 04:23:58PM -0800, Greg Kroah-Hartman wrote:
>> On Thu, Jan 25, 2024 at 04:11:28PM -0800, Joe Damato wrote:
>> > On Thu, Jan 25, 2024 at 03:21:46PM -0800, Greg Kroah-Hartman wrote:
>> > > On Thu, Jan 25, 2024 at 10:56:59PM +0000, Joe Damato wrote:
>> > > > +struct epoll_params {
>> > > > + u64 busy_poll_usecs;
>> > > > + u16 busy_poll_budget;
>> > > > +
>> > > > + /* for future fields */
>> > > > + u8 data[118];
>> > > > +} EPOLL_PACKED;
>> > >
>
> Sure, that makes sense to me. I'll remove it in the v4 alongside the other
> changes you've requested.
>
> Thanks for your time and patience reviewing my code. I greatly appreciate
> your helpful comments and feedback.
Note that you should still pad the structure to its normal
alignment. On non-x86 targets this would currently mean a
multiple of 64 bits.
I would suggest dropping the EPOLL_PACKED here entirely and
just using a fully aligned structure on all architectures, like
struct epoll_params {
__aligned_u64 busy_poll_usecs;
__u16 busy_poll_budget;
__u8 __pad[6];
};
The explicit padding can help avoid leaking stack data when
a structure is copied back from kernel to userspace, so I would
just always use it in ioctl data structures.
Arnd
From: Arnd Bergmann
> Sent: 26 January 2024 06:16
>
> On Fri, Jan 26, 2024, at 03:36, Joe Damato wrote:
> > On Thu, Jan 25, 2024 at 04:23:58PM -0800, Greg Kroah-Hartman wrote:
> >> On Thu, Jan 25, 2024 at 04:11:28PM -0800, Joe Damato wrote:
> >> > On Thu, Jan 25, 2024 at 03:21:46PM -0800, Greg Kroah-Hartman wrote:
> >> > > On Thu, Jan 25, 2024 at 10:56:59PM +0000, Joe Damato wrote:
> >> > > > +struct epoll_params {
> >> > > > + u64 busy_poll_usecs;
> >> > > > + u16 busy_poll_budget;
> >> > > > +
> >> > > > + /* for future fields */
> >> > > > + u8 data[118];
> >> > > > +} EPOLL_PACKED;
> >> > >
> >
> > Sure, that makes sense to me. I'll remove it in the v4 alongside the other
> > changes you've requested.
> >
> > Thanks for your time and patience reviewing my code. I greatly appreciate
> > your helpful comments and feedback.
>
> Note that you should still pad the structure to its normal
> alignment. On non-x86 targets this would currently mean a
> multiple of 64 bits.
>
> I would suggest dropping the EPOLL_PACKED here entirely and
> just using a fully aligned structure on all architectures, like
>
> struct epoll_params {
> __aligned_u64 busy_poll_usecs;
> __u16 busy_poll_budget;
> __u8 __pad[6];
> };
>
> The explicit padding can help avoid leaking stack data when
> a structure is copied back from kernel to userspace, so I would
> just always use it in ioctl data structures.
Or just use 32bit types for both fields.
Both values need erroring before they get that large.
32bit of usec is about 20 seconds.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Hi Joe,
kernel test robot noticed the following build errors:
[auto build test ERROR on net-next/main]
url: https://github.com/intel-lab-lkp/linux/commits/Joe-Damato/eventpoll-support-busy-poll-per-epoll-instance/20240126-070250
base: net-next/main
patch link: https://lore.kernel.org/r/20240125225704.12781-4-jdamato%40fastly.com
patch subject: [PATCH net-next v3 3/3] eventpoll: Add epoll ioctl for epoll_params
config: i386-buildonly-randconfig-002-20240127 (https://download.01.org/0day-ci/archive/20240128/[email protected]/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240128/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All errors (new ones prefixed by >>):
In file included from <command-line>:
>> ./usr/include/linux/eventpoll.h:89:9: error: unknown type name 'u64'
89 | u64 busy_poll_usecs;
| ^~~
>> ./usr/include/linux/eventpoll.h:90:9: error: unknown type name 'u16'
90 | u16 busy_poll_budget;
| ^~~
>> ./usr/include/linux/eventpoll.h:93:9: error: unknown type name 'u8'
93 | u8 data[118];
| ^~
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki