In vfio_spapr_iommu_eeh_ioctl(), if the ioctl command is VFIO_EEH_PE_OP,
the user-space buffer 'arg' is copied to the kernel object 'op' and the
'argsz' and 'flags' fields of 'op' are checked. If the check fails, an
error code EINVAL is returned. Otherwise, 'op.op' is further checked
through a switch statement to invoke related handlers. If 'op.op' is
VFIO_EEH_PE_INJECT_ERR, the whole user-space buffer 'arg' is copied again
to 'op' to obtain the err information. However, in the following execution
of this case, the fields of 'op', except the field 'err', are actually not
used. That is, the second copy has a redundant part. Therefore, for both
performance consideration, the redundant part of the second copy should be
removed.
This patch removes such a part in the second copy. It only copies from
'err.type' to 'err.mask', which is exactly required by the
VFIO_EEH_PE_INJECT_ERR op.
Signed-off-by: Wenwen Wang <[email protected]>
---
drivers/vfio/vfio_spapr_eeh.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 38edeb4..66634c6 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -37,6 +37,7 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
struct eeh_pe *pe;
struct vfio_eeh_pe_op op;
unsigned long minsz;
+ unsigned long start, end;
long ret = -EINVAL;
switch (cmd) {
@@ -86,10 +87,12 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
ret = eeh_pe_configure(pe);
break;
case VFIO_EEH_PE_INJECT_ERR:
- minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
- if (op.argsz < minsz)
+ start = offsetof(struct vfio_eeh_pe_op, err.type);
+ end = offsetofend(struct vfio_eeh_pe_op, err.mask);
+ if (op.argsz < end)
return -EINVAL;
- if (copy_from_user(&op, (void __user *)arg, minsz))
+ if (copy_from_user(&op.err, (char __user *)arg +
+ start, end - start))
return -EFAULT;
ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
--
2.7.4
On Mon, 8 Oct 2018 13:06:20 -0500
Wenwen Wang <[email protected]> wrote:
> In vfio_spapr_iommu_eeh_ioctl(), if the ioctl command is VFIO_EEH_PE_OP,
> the user-space buffer 'arg' is copied to the kernel object 'op' and the
> 'argsz' and 'flags' fields of 'op' are checked. If the check fails, an
> error code EINVAL is returned. Otherwise, 'op.op' is further checked
> through a switch statement to invoke related handlers. If 'op.op' is
> VFIO_EEH_PE_INJECT_ERR, the whole user-space buffer 'arg' is copied again
> to 'op' to obtain the err information. However, in the following execution
> of this case, the fields of 'op', except the field 'err', are actually not
> used. That is, the second copy has a redundant part. Therefore, for both
> performance consideration, the redundant part of the second copy should be
> removed.
>
> This patch removes such a part in the second copy. It only copies from
> 'err.type' to 'err.mask', which is exactly required by the
> VFIO_EEH_PE_INJECT_ERR op.
>
> Signed-off-by: Wenwen Wang <[email protected]>
> ---
> drivers/vfio/vfio_spapr_eeh.c | 9 ++++++---
> 1 file changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
> index 38edeb4..66634c6 100644
> --- a/drivers/vfio/vfio_spapr_eeh.c
> +++ b/drivers/vfio/vfio_spapr_eeh.c
> @@ -37,6 +37,7 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
> struct eeh_pe *pe;
> struct vfio_eeh_pe_op op;
> unsigned long minsz;
> + unsigned long start, end;
> long ret = -EINVAL;
>
> switch (cmd) {
> @@ -86,10 +87,12 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
> ret = eeh_pe_configure(pe);
> break;
> case VFIO_EEH_PE_INJECT_ERR:
> - minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
> - if (op.argsz < minsz)
> + start = offsetof(struct vfio_eeh_pe_op, err.type);
We already have this in minsz, offsetofend(,op) == offsetof(,err.type).
That can't change without breaking userspace.
> + end = offsetofend(struct vfio_eeh_pe_op, err.mask);
> + if (op.argsz < end)
> return -EINVAL;
> - if (copy_from_user(&op, (void __user *)arg, minsz))
> + if (copy_from_user(&op.err, (char __user *)arg +
> + start, end - start))
So we trade 12 bytes of redundant copy for an extra stack variable and
an arithmetic operation, not necessarily an obvious win, but more
correct I guess.
Alexey, I also notice that these 12 bytes means that the u64 fields in
struct vfio_eeh_pe_err are not 8-byte aligned which could lead to
compiler dependent packing interpretation issues with userspace.
Should there be a 4-byte reserved field in there to make it explicit
(so long as it matches the current interpretation)? Thanks,
Alex
On Mon, Oct 8, 2018 at 1:47 PM Alex Williamson
<[email protected]> wrote:
>
> On Mon, 8 Oct 2018 13:06:20 -0500
> Wenwen Wang <[email protected]> wrote:
>
> > In vfio_spapr_iommu_eeh_ioctl(), if the ioctl command is VFIO_EEH_PE_OP,
> > the user-space buffer 'arg' is copied to the kernel object 'op' and the
> > 'argsz' and 'flags' fields of 'op' are checked. If the check fails, an
> > error code EINVAL is returned. Otherwise, 'op.op' is further checked
> > through a switch statement to invoke related handlers. If 'op.op' is
> > VFIO_EEH_PE_INJECT_ERR, the whole user-space buffer 'arg' is copied again
> > to 'op' to obtain the err information. However, in the following execution
> > of this case, the fields of 'op', except the field 'err', are actually not
> > used. That is, the second copy has a redundant part. Therefore, for both
> > performance consideration, the redundant part of the second copy should be
> > removed.
> >
> > This patch removes such a part in the second copy. It only copies from
> > 'err.type' to 'err.mask', which is exactly required by the
> > VFIO_EEH_PE_INJECT_ERR op.
> >
> > Signed-off-by: Wenwen Wang <[email protected]>
> > ---
> > drivers/vfio/vfio_spapr_eeh.c | 9 ++++++---
> > 1 file changed, 6 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
> > index 38edeb4..66634c6 100644
> > --- a/drivers/vfio/vfio_spapr_eeh.c
> > +++ b/drivers/vfio/vfio_spapr_eeh.c
> > @@ -37,6 +37,7 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
> > struct eeh_pe *pe;
> > struct vfio_eeh_pe_op op;
> > unsigned long minsz;
> > + unsigned long start, end;
> > long ret = -EINVAL;
> >
> > switch (cmd) {
> > @@ -86,10 +87,12 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
> > ret = eeh_pe_configure(pe);
> > break;
> > case VFIO_EEH_PE_INJECT_ERR:
> > - minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
> > - if (op.argsz < minsz)
> > + start = offsetof(struct vfio_eeh_pe_op, err.type);
>
> We already have this in minsz, offsetofend(,op) == offsetof(,err.type).
> That can't change without breaking userspace.
>
> > + end = offsetofend(struct vfio_eeh_pe_op, err.mask);
> > + if (op.argsz < end)
> > return -EINVAL;
> > - if (copy_from_user(&op, (void __user *)arg, minsz))
> > + if (copy_from_user(&op.err, (char __user *)arg +
> > + start, end - start))
>
> So we trade 12 bytes of redundant copy for an extra stack variable and
> an arithmetic operation, not necessarily an obvious win, but more
> correct I guess.
>
> Alexey, I also notice that these 12 bytes means that the u64 fields in
> struct vfio_eeh_pe_err are not 8-byte aligned which could lead to
> compiler dependent packing interpretation issues with userspace.
> Should there be a 4-byte reserved field in there to make it explicit
> (so long as it matches the current interpretation)? Thanks,
It sounds reasonable. I can add such a field in struct vfio_eeh_pe_op. Thanks!
Wenwen