2022-05-14 01:31:38

by Matthew Rosato

[permalink] [raw]
Subject: [PATCH v7 20/22] KVM: s390: add KVM_S390_ZPCI_OP to manage guest zPCI devices

The KVM_S390_ZPCI_OP ioctl provides a mechanism for managing
hardware-assisted virtualization features for s390X zPCI passthrough.
Add the first 2 operations, which can be used to enable/disable
the specified device for Adapter Event Notification interpretation.

Signed-off-by: Matthew Rosato <[email protected]>
---
Documentation/virt/kvm/api.rst | 45 +++++++++++++++++++
arch/s390/kvm/kvm-s390.c | 23 ++++++++++
arch/s390/kvm/pci.c | 81 ++++++++++++++++++++++++++++++++++
arch/s390/kvm/pci.h | 2 +
include/uapi/linux/kvm.h | 31 +++++++++++++
5 files changed, 182 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 4a900cdbc62e..a7cd5ebce031 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5645,6 +5645,51 @@ enabled with ``arch_prctl()``, but this may change in the future.
The offsets of the state save areas in struct kvm_xsave follow the contents
of CPUID leaf 0xD on the host.

+4.135 KVM_S390_ZPCI_OP
+--------------------
+
+:Capability: KVM_CAP_S390_ZPCI_OP
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: struct kvm_s390_zpci_op (in)
+:Returns: 0 on success, <0 on error
+
+Used to manage hardware-assisted virtualization features for zPCI devices.
+
+Parameters are specified via the following structure::
+
+ struct kvm_s390_zpci_op {
+ /* in */
+ __u32 fh; /* target device */
+ __u8 op; /* operation to perform */
+ __u8 pad[3];
+ union {
+ /* for KVM_S390_ZPCIOP_REG_AEN */
+ struct {
+ __u64 ibv; /* Guest addr of interrupt bit vector */
+ __u64 sb; /* Guest addr of summary bit */
+ __u32 flags;
+ __u32 noi; /* Number of interrupts */
+ __u8 isc; /* Guest interrupt subclass */
+ __u8 sbo; /* Offset of guest summary bit vector */
+ __u16 pad;
+ } reg_aen;
+ __u64 reserved[8];
+ } u;
+ };
+
+The type of operation is specified in the "op" field.
+KVM_S390_ZPCIOP_REG_AEN is used to register the VM for adapter event
+notification interpretation, which will allow firmware delivery of adapter
+events directly to the vm, with KVM providing a backup delivery mechanism;
+KVM_S390_ZPCIOP_DEREG_AEN is used to subsequently disable interpretation of
+adapter event notifications.
+
+The target zPCI function must also be specified via the "fh" field. For the
+KVM_S390_ZPCIOP_REG_AEN operation, additional information to establish firmware
+delivery must be provided via the "reg_aen" struct.
+
+The "reserved" field is meant for future extensions.

5. The kvm_run structure
========================
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b95b25490018..1af7cea9d579 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -618,6 +618,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_PROTECTED:
r = is_prot_virt_host();
break;
+ case KVM_CAP_S390_ZPCI_OP:
+ if (kvm_s390_pci_interp_allowed())
+ r = 1;
+ else
+ r = 0;
+ break;
default:
r = 0;
}
@@ -2633,6 +2639,23 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
break;
}
+ case KVM_S390_ZPCI_OP: {
+ struct kvm_s390_zpci_op args;
+
+ r = -EINVAL;
+ if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+ break;
+ if (copy_from_user(&args, argp, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ r = kvm_s390_pci_zpci_op(kvm, &args);
+ if (r)
+ break;
+ if (copy_to_user(argp, &args, sizeof(args)))
+ r = -EFAULT;
+ break;
+ }
default:
r = -ENOTTY;
}
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 1393a1604494..6e6254016be4 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -585,6 +585,87 @@ void kvm_s390_pci_clear_list(struct kvm *kvm)
spin_unlock(&kvm->arch.kzdev_list_lock);
}

+static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
+{
+ struct zpci_dev *zdev = NULL;
+ struct kvm_zdev *kzdev;
+
+ spin_lock(&kvm->arch.kzdev_list_lock);
+ list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
+ if (kzdev->zdev->fh == fh) {
+ zdev = kzdev->zdev;
+ break;
+ }
+ }
+ spin_unlock(&kvm->arch.kzdev_list_lock);
+
+ return zdev;
+}
+
+static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
+ struct kvm_s390_zpci_op *args)
+{
+ struct zpci_fib fib = {};
+
+ fib.fmt0.aibv = args->u.reg_aen.ibv;
+ fib.fmt0.isc = args->u.reg_aen.isc;
+ fib.fmt0.noi = args->u.reg_aen.noi;
+ if (args->u.reg_aen.sb != 0) {
+ fib.fmt0.aisb = args->u.reg_aen.sb;
+ fib.fmt0.aisbo = args->u.reg_aen.sbo;
+ fib.fmt0.sum = 1;
+ } else {
+ fib.fmt0.aisb = 0;
+ fib.fmt0.aisbo = 0;
+ fib.fmt0.sum = 0;
+ }
+
+ if (args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST)
+ return kvm_s390_pci_aif_enable(zdev, &fib, true);
+ else
+ return kvm_s390_pci_aif_enable(zdev, &fib, false);
+}
+
+int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
+{
+ struct kvm_zdev *kzdev;
+ struct zpci_dev *zdev;
+ int r;
+
+ zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
+ if (!zdev)
+ return -ENODEV;
+
+ mutex_lock(&zdev->kzdev_lock);
+ mutex_lock(&kvm->lock);
+
+ kzdev = zdev->kzdev;
+ if (!kzdev) {
+ r = -ENODEV;
+ goto out;
+ }
+ if (kzdev->kvm != kvm) {
+ r = -EPERM;
+ goto out;
+ }
+
+ switch (args->op) {
+ case KVM_S390_ZPCIOP_REG_AEN:
+ r = kvm_s390_pci_zpci_reg_aen(zdev, args);
+ break;
+ case KVM_S390_ZPCIOP_DEREG_AEN:
+ r = kvm_s390_pci_aif_disable(zdev, false);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+out:
+ mutex_unlock(&kvm->lock);
+ mutex_unlock(&zdev->kzdev_lock);
+ return r;
+}
+
int kvm_s390_pci_init(void)
{
aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h
index fb2b91b76e0c..0351382e990f 100644
--- a/arch/s390/kvm/pci.h
+++ b/arch/s390/kvm/pci.h
@@ -59,6 +59,8 @@ void kvm_s390_pci_aen_exit(void);
void kvm_s390_pci_init_list(struct kvm *kvm);
void kvm_s390_pci_clear_list(struct kvm *kvm);

+int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args);
+
int kvm_s390_pci_init(void);
void kvm_s390_pci_exit(void);

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6a184d260c7f..1d3d41523d10 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1152,6 +1152,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_DISABLE_QUIRKS2 213
/* #define KVM_CAP_VM_TSC_CONTROL 214 */
#define KVM_CAP_SYSTEM_EVENT_DATA 215
+#define KVM_CAP_S390_ZPCI_OP 216

#ifdef KVM_CAP_IRQ_ROUTING

@@ -2068,4 +2069,34 @@ struct kvm_stats_desc {
/* Available with KVM_CAP_XSAVE2 */
#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)

+/* Available with KVM_CAP_S390_ZPCI_OP */
+#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd0, struct kvm_s390_zpci_op)
+
+struct kvm_s390_zpci_op {
+ /* in */
+ __u32 fh; /* target device */
+ __u8 op; /* operation to perform */
+ __u8 pad[3];
+ union {
+ /* for KVM_S390_ZPCIOP_REG_AEN */
+ struct {
+ __u64 ibv; /* Guest addr of interrupt bit vector */
+ __u64 sb; /* Guest addr of summary bit */
+ __u32 flags;
+ __u32 noi; /* Number of interrupts */
+ __u8 isc; /* Guest interrupt subclass */
+ __u8 sbo; /* Offset of guest summary bit vector */
+ __u16 pad;
+ } reg_aen;
+ __u64 reserved[8];
+ } u;
+};
+
+/* types for kvm_s390_zpci_op->op */
+#define KVM_S390_ZPCIOP_REG_AEN 0
+#define KVM_S390_ZPCIOP_DEREG_AEN 1
+
+/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
+#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
+
#endif /* __LINUX_KVM_H */
--
2.27.0



2022-05-16 17:55:22

by Matthew Rosato

[permalink] [raw]
Subject: Re: [PATCH v7 20/22] KVM: s390: add KVM_S390_ZPCI_OP to manage guest zPCI devices

On 5/16/22 5:52 AM, Thomas Huth wrote:
> On 13/05/2022 21.15, Matthew Rosato wrote:
>> The KVM_S390_ZPCI_OP ioctl provides a mechanism for managing
>> hardware-assisted virtualization features for s390X zPCI passthrough.
>
> s/s390X/s390x/
>
>> Add the first 2 operations, which can be used to enable/disable
>> the specified device for Adapter Event Notification interpretation.
>>
>> Signed-off-by: Matthew Rosato <[email protected]>
>> ---
>>   Documentation/virt/kvm/api.rst | 45 +++++++++++++++++++
>>   arch/s390/kvm/kvm-s390.c       | 23 ++++++++++
>>   arch/s390/kvm/pci.c            | 81 ++++++++++++++++++++++++++++++++++
>>   arch/s390/kvm/pci.h            |  2 +
>>   include/uapi/linux/kvm.h       | 31 +++++++++++++
>>   5 files changed, 182 insertions(+)
>>
>> diff --git a/Documentation/virt/kvm/api.rst
>> b/Documentation/virt/kvm/api.rst
>> index 4a900cdbc62e..a7cd5ebce031 100644
>> --- a/Documentation/virt/kvm/api.rst
>> +++ b/Documentation/virt/kvm/api.rst
>> @@ -5645,6 +5645,51 @@ enabled with ``arch_prctl()``, but this may
>> change in the future.
>>   The offsets of the state save areas in struct kvm_xsave follow the
>> contents
>>   of CPUID leaf 0xD on the host.
>> +4.135 KVM_S390_ZPCI_OP
>> +--------------------
>> +
>> +:Capability: KVM_CAP_S390_ZPCI_OP
>> +:Architectures: s390
>> +:Type: vcpu ioctl
>
> vcpu? ... you're wiring it up in  kvm_arch_vm_ioctl() later, so I assume
> it's rather a VM ioctl?

Yup, VM ioctl, bad copy/paste job...

>
>> +:Parameters: struct kvm_s390_zpci_op (in)
>> +:Returns: 0 on success, <0 on error
>> +
>> +Used to manage hardware-assisted virtualization features for zPCI
>> devices.
>> +
>> +Parameters are specified via the following structure::
>> +
>> +  struct kvm_s390_zpci_op {
>> +    /* in */
>
> If all is "in", why is there a copy_to_user() in the code later?
>

Oh no, this is a leftover from a prior version... Good catch. There
should no longer be a copy_to_user.

>> +    __u32 fh;        /* target device */
>> +    __u8  op;        /* operation to perform */
>> +    __u8  pad[3];
>> +    union {
>> +        /* for KVM_S390_ZPCIOP_REG_AEN */
>> +        struct {
>> +            __u64 ibv;    /* Guest addr of interrupt bit vector */
>> +            __u64 sb;    /* Guest addr of summary bit */
>
> If this is really a vcpu ioctl, what kind of addresses are you talking
> about here? virtual addresses? real addresses? absolute addresses?

It's a VM ioctl. These are guest kernel physical addresses that are
later pinned in arch/s390/kvm/pci.c:kvm_s390_pci_aif_enable() as part of
handling the ioctl.

>
>> +            __u32 flags;
>> +            __u32 noi;    /* Number of interrupts */
>> +            __u8 isc;    /* Guest interrupt subclass */
>> +            __u8 sbo;    /* Offset of guest summary bit vector */
>> +            __u16 pad;
>> +        } reg_aen;
>> +        __u64 reserved[8];
>> +    } u;
>> +  };
>> +
>> +The type of operation is specified in the "op" field.
>> +KVM_S390_ZPCIOP_REG_AEN is used to register the VM for adapter event
>> +notification interpretation, which will allow firmware delivery of
>> adapter
>> +events directly to the vm, with KVM providing a backup delivery
>> mechanism;
>> +KVM_S390_ZPCIOP_DEREG_AEN is used to subsequently disable
>> interpretation of
>> +adapter event notifications.
>> +
>> +The target zPCI function must also be specified via the "fh" field.
>> For the
>> +KVM_S390_ZPCIOP_REG_AEN operation, additional information to
>> establish firmware
>> +delivery must be provided via the "reg_aen" struct.
>> +
>> +The "reserved" field is meant for future extensions.
>
> Maybe also mention the "pad" fields? And add should these also be
> initialized to 0 by the calling userspace program?

Sure, I can mention them. And yes, I agree that userspace should
initialize them to 0, I'll update the QEMU series accordingly.

>
>>   5. The kvm_run structure
>>   ========================
>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>> index b95b25490018..1af7cea9d579 100644
>> --- a/arch/s390/kvm/kvm-s390.c
>> +++ b/arch/s390/kvm/kvm-s390.c
>> @@ -618,6 +618,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm,
>> long ext)
>>       case KVM_CAP_S390_PROTECTED:
>>           r = is_prot_virt_host();
>>           break;
>> +    case KVM_CAP_S390_ZPCI_OP:
>> +        if (kvm_s390_pci_interp_allowed())
>> +            r = 1;
>> +        else
>> +            r = 0;
>
> Could be shortened to:
>
>         r = kvm_s390_pci_interp_allowed();
>
>> +        break;
>>       default:
>>           r = 0;
>>       }
>> @@ -2633,6 +2639,23 @@ long kvm_arch_vm_ioctl(struct file *filp,
>>               r = -EFAULT;
>>           break;
>>       }
>> +    case KVM_S390_ZPCI_OP: {
>> +        struct kvm_s390_zpci_op args;
>> +
>> +        r = -EINVAL;
>> +        if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
>> +            break;
>> +        if (copy_from_user(&args, argp, sizeof(args))) {
>> +            r = -EFAULT;
>> +            break;
>> +        }
>> +        r = kvm_s390_pci_zpci_op(kvm, &args);
>> +        if (r)
>> +            break;
>> +        if (copy_to_user(argp, &args, sizeof(args)))
>> +            r = -EFAULT;
>
> So this copy_to_user() indicates that information is returned to
> userspace ... but below, the ioctl is declared with _IOW only ... this
> does not match. Should it be declared with _IOWR or should the
> copy_to_user() be dropped?

copy_to_user should be dropped. Thanks!

>
>> +        break;
>> +    }
>>       default:
>>           r = -ENOTTY;
>>       }
>> diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
>> index 1393a1604494..6e6254016be4 100644
>> --- a/arch/s390/kvm/pci.c
>> +++ b/arch/s390/kvm/pci.c
>> @@ -585,6 +585,87 @@ void kvm_s390_pci_clear_list(struct kvm *kvm)
>>       spin_unlock(&kvm->arch.kzdev_list_lock);
>>   }
>> +static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
>> +{
>> +    struct zpci_dev *zdev = NULL;
>> +    struct kvm_zdev *kzdev;
>> +
>> +    spin_lock(&kvm->arch.kzdev_list_lock);
>> +    list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
>> +        if (kzdev->zdev->fh == fh) {
>> +            zdev = kzdev->zdev;
>> +            break;
>> +        }
>> +    }
>> +    spin_unlock(&kvm->arch.kzdev_list_lock);
>> +
>> +    return zdev;
>> +}
>> +
>> +static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
>> +                     struct kvm_s390_zpci_op *args)
>> +{
>> +    struct zpci_fib fib = {};
>> +
>> +    fib.fmt0.aibv = args->u.reg_aen.ibv;
>> +    fib.fmt0.isc = args->u.reg_aen.isc;
>> +    fib.fmt0.noi = args->u.reg_aen.noi;
>> +    if (args->u.reg_aen.sb != 0) {
>> +        fib.fmt0.aisb = args->u.reg_aen.sb;
>> +        fib.fmt0.aisbo = args->u.reg_aen.sbo;
>> +        fib.fmt0.sum = 1;
>> +    } else {
>> +        fib.fmt0.aisb = 0;
>> +        fib.fmt0.aisbo = 0;
>> +        fib.fmt0.sum = 0;
>> +    }
>> +
>> +    if (args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST)
>> +        return kvm_s390_pci_aif_enable(zdev, &fib, true);
>> +    else
>> +        return kvm_s390_pci_aif_enable(zdev, &fib, false);
>
> Alternatively (just a matter of taste):
>
>     bool hostflag;
>     ...
>     hostflag = (args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST);
>     return kvm_s390_pci_aif_enable(zdev, &fib, hostflag);
>
>> +}
>> +
>> +int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
>> +{
>> +    struct kvm_zdev *kzdev;
>> +    struct zpci_dev *zdev;
>> +    int r;
>> +
>> +    zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
>> +    if (!zdev)
>> +        return -ENODEV;
>> +
>> +    mutex_lock(&zdev->kzdev_lock);
>> +    mutex_lock(&kvm->lock);
>> +
>> +    kzdev = zdev->kzdev;
>> +    if (!kzdev) {
>> +        r = -ENODEV;
>> +        goto out;
>> +    }
>> +    if (kzdev->kvm != kvm) {
>> +        r = -EPERM;
>> +        goto out;
>> +    }
>> +
>> +    switch (args->op) {
>> +    case KVM_S390_ZPCIOP_REG_AEN:
>
> Please also check here that args->u.reg_aen.flags does not have any bits
> set that we don't support here (otherwise, this could cause some trouble
> when introducing additional flags later).

Good idea, will do

>
>> +        r = kvm_s390_pci_zpci_reg_aen(zdev, args);
>> +        break;
>> +    case KVM_S390_ZPCIOP_DEREG_AEN:
>> +        r = kvm_s390_pci_aif_disable(zdev, false);
>> +        break;
>> +    default:
>> +        r = -EINVAL;
>> +    }
>> +
>> +out:
>> +    mutex_unlock(&kvm->lock);
>> +    mutex_unlock(&zdev->kzdev_lock);
>> +    return r;
>> +}
>> +
>>   int kvm_s390_pci_init(void)
>>   {
>>       aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
>> diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h
>> index fb2b91b76e0c..0351382e990f 100644
>> --- a/arch/s390/kvm/pci.h
>> +++ b/arch/s390/kvm/pci.h
>> @@ -59,6 +59,8 @@ void kvm_s390_pci_aen_exit(void);
>>   void kvm_s390_pci_init_list(struct kvm *kvm);
>>   void kvm_s390_pci_clear_list(struct kvm *kvm);
>> +int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op
>> *args);
>> +
>>   int kvm_s390_pci_init(void);
>>   void kvm_s390_pci_exit(void);
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index 6a184d260c7f..1d3d41523d10 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -1152,6 +1152,7 @@ struct kvm_ppc_resize_hpt {
>>   #define KVM_CAP_DISABLE_QUIRKS2 213
>>   /* #define KVM_CAP_VM_TSC_CONTROL 214 */
>>   #define KVM_CAP_SYSTEM_EVENT_DATA 215
>> +#define KVM_CAP_S390_ZPCI_OP 216
>>   #ifdef KVM_CAP_IRQ_ROUTING
>> @@ -2068,4 +2069,34 @@ struct kvm_stats_desc {
>>   /* Available with KVM_CAP_XSAVE2 */
>>   #define KVM_GET_XSAVE2          _IOR(KVMIO,  0xcf, struct kvm_xsave)
>> +/* Available with KVM_CAP_S390_ZPCI_OP */
>> +#define KVM_S390_ZPCI_OP      _IOW(KVMIO,  0xd0, struct
>> kvm_s390_zpci_op)
>
> Please double-check whether this should be _IOWR instead (see above). >

As mentioned above, the copy_to_user() should be removed.

>> +struct kvm_s390_zpci_op {
>> +    /* in */
>> +    __u32 fh;        /* target device */
>> +    __u8  op;        /* operation to perform */
>> +    __u8  pad[3];
>> +    union {
>> +        /* for KVM_S390_ZPCIOP_REG_AEN */
>> +        struct {
>> +            __u64 ibv;    /* Guest addr of interrupt bit vector */
>> +            __u64 sb;    /* Guest addr of summary bit */
>> +            __u32 flags;
>> +            __u32 noi;    /* Number of interrupts */
>> +            __u8 isc;    /* Guest interrupt subclass */
>> +            __u8 sbo;    /* Offset of guest summary bit vector */
>> +            __u16 pad;
>> +        } reg_aen;
>> +        __u64 reserved[8];
>> +    } u;
>> +};
>> +
>> +/* types for kvm_s390_zpci_op->op */
>> +#define KVM_S390_ZPCIOP_REG_AEN        0
>> +#define KVM_S390_ZPCIOP_DEREG_AEN    1
>> +
>> +/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
>> +#define KVM_S390_ZPCIOP_REGAEN_HOST    (1 << 0)
>> +
>>   #endif /* __LINUX_KVM_H */
>
>  Thomas
>


2022-05-16 20:23:25

by Thomas Huth

[permalink] [raw]
Subject: Re: [PATCH v7 20/22] KVM: s390: add KVM_S390_ZPCI_OP to manage guest zPCI devices

On 13/05/2022 21.15, Matthew Rosato wrote:
> The KVM_S390_ZPCI_OP ioctl provides a mechanism for managing
> hardware-assisted virtualization features for s390X zPCI passthrough.

s/s390X/s390x/

> Add the first 2 operations, which can be used to enable/disable
> the specified device for Adapter Event Notification interpretation.
>
> Signed-off-by: Matthew Rosato <[email protected]>
> ---
> Documentation/virt/kvm/api.rst | 45 +++++++++++++++++++
> arch/s390/kvm/kvm-s390.c | 23 ++++++++++
> arch/s390/kvm/pci.c | 81 ++++++++++++++++++++++++++++++++++
> arch/s390/kvm/pci.h | 2 +
> include/uapi/linux/kvm.h | 31 +++++++++++++
> 5 files changed, 182 insertions(+)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 4a900cdbc62e..a7cd5ebce031 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -5645,6 +5645,51 @@ enabled with ``arch_prctl()``, but this may change in the future.
> The offsets of the state save areas in struct kvm_xsave follow the contents
> of CPUID leaf 0xD on the host.
>
> +4.135 KVM_S390_ZPCI_OP
> +--------------------
> +
> +:Capability: KVM_CAP_S390_ZPCI_OP
> +:Architectures: s390
> +:Type: vcpu ioctl

vcpu? ... you're wiring it up in kvm_arch_vm_ioctl() later, so I assume
it's rather a VM ioctl?

> +:Parameters: struct kvm_s390_zpci_op (in)
> +:Returns: 0 on success, <0 on error
> +
> +Used to manage hardware-assisted virtualization features for zPCI devices.
> +
> +Parameters are specified via the following structure::
> +
> + struct kvm_s390_zpci_op {
> + /* in */

If all is "in", why is there a copy_to_user() in the code later?

> + __u32 fh; /* target device */
> + __u8 op; /* operation to perform */
> + __u8 pad[3];
> + union {
> + /* for KVM_S390_ZPCIOP_REG_AEN */
> + struct {
> + __u64 ibv; /* Guest addr of interrupt bit vector */
> + __u64 sb; /* Guest addr of summary bit */

If this is really a vcpu ioctl, what kind of addresses are you talking about
here? virtual addresses? real addresses? absolute addresses?

> + __u32 flags;
> + __u32 noi; /* Number of interrupts */
> + __u8 isc; /* Guest interrupt subclass */
> + __u8 sbo; /* Offset of guest summary bit vector */
> + __u16 pad;
> + } reg_aen;
> + __u64 reserved[8];
> + } u;
> + };
> +
> +The type of operation is specified in the "op" field.
> +KVM_S390_ZPCIOP_REG_AEN is used to register the VM for adapter event
> +notification interpretation, which will allow firmware delivery of adapter
> +events directly to the vm, with KVM providing a backup delivery mechanism;
> +KVM_S390_ZPCIOP_DEREG_AEN is used to subsequently disable interpretation of
> +adapter event notifications.
> +
> +The target zPCI function must also be specified via the "fh" field. For the
> +KVM_S390_ZPCIOP_REG_AEN operation, additional information to establish firmware
> +delivery must be provided via the "reg_aen" struct.
> +
> +The "reserved" field is meant for future extensions.

Maybe also mention the "pad" fields? And add should these also be
initialized to 0 by the calling userspace program?

> 5. The kvm_run structure
> ========================
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index b95b25490018..1af7cea9d579 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -618,6 +618,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> case KVM_CAP_S390_PROTECTED:
> r = is_prot_virt_host();
> break;
> + case KVM_CAP_S390_ZPCI_OP:
> + if (kvm_s390_pci_interp_allowed())
> + r = 1;
> + else
> + r = 0;

Could be shortened to:

r = kvm_s390_pci_interp_allowed();

> + break;
> default:
> r = 0;
> }
> @@ -2633,6 +2639,23 @@ long kvm_arch_vm_ioctl(struct file *filp,
> r = -EFAULT;
> break;
> }
> + case KVM_S390_ZPCI_OP: {
> + struct kvm_s390_zpci_op args;
> +
> + r = -EINVAL;
> + if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
> + break;
> + if (copy_from_user(&args, argp, sizeof(args))) {
> + r = -EFAULT;
> + break;
> + }
> + r = kvm_s390_pci_zpci_op(kvm, &args);
> + if (r)
> + break;
> + if (copy_to_user(argp, &args, sizeof(args)))
> + r = -EFAULT;

So this copy_to_user() indicates that information is returned to userspace
... but below, the ioctl is declared with _IOW only ... this does not match.
Should it be declared with _IOWR or should the copy_to_user() be dropped?

> + break;
> + }
> default:
> r = -ENOTTY;
> }
> diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
> index 1393a1604494..6e6254016be4 100644
> --- a/arch/s390/kvm/pci.c
> +++ b/arch/s390/kvm/pci.c
> @@ -585,6 +585,87 @@ void kvm_s390_pci_clear_list(struct kvm *kvm)
> spin_unlock(&kvm->arch.kzdev_list_lock);
> }
>
> +static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
> +{
> + struct zpci_dev *zdev = NULL;
> + struct kvm_zdev *kzdev;
> +
> + spin_lock(&kvm->arch.kzdev_list_lock);
> + list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
> + if (kzdev->zdev->fh == fh) {
> + zdev = kzdev->zdev;
> + break;
> + }
> + }
> + spin_unlock(&kvm->arch.kzdev_list_lock);
> +
> + return zdev;
> +}
> +
> +static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
> + struct kvm_s390_zpci_op *args)
> +{
> + struct zpci_fib fib = {};
> +
> + fib.fmt0.aibv = args->u.reg_aen.ibv;
> + fib.fmt0.isc = args->u.reg_aen.isc;
> + fib.fmt0.noi = args->u.reg_aen.noi;
> + if (args->u.reg_aen.sb != 0) {
> + fib.fmt0.aisb = args->u.reg_aen.sb;
> + fib.fmt0.aisbo = args->u.reg_aen.sbo;
> + fib.fmt0.sum = 1;
> + } else {
> + fib.fmt0.aisb = 0;
> + fib.fmt0.aisbo = 0;
> + fib.fmt0.sum = 0;
> + }
> +
> + if (args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST)
> + return kvm_s390_pci_aif_enable(zdev, &fib, true);
> + else
> + return kvm_s390_pci_aif_enable(zdev, &fib, false);

Alternatively (just a matter of taste):

bool hostflag;
...
hostflag = (args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST);
return kvm_s390_pci_aif_enable(zdev, &fib, hostflag);

> +}
> +
> +int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
> +{
> + struct kvm_zdev *kzdev;
> + struct zpci_dev *zdev;
> + int r;
> +
> + zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
> + if (!zdev)
> + return -ENODEV;
> +
> + mutex_lock(&zdev->kzdev_lock);
> + mutex_lock(&kvm->lock);
> +
> + kzdev = zdev->kzdev;
> + if (!kzdev) {
> + r = -ENODEV;
> + goto out;
> + }
> + if (kzdev->kvm != kvm) {
> + r = -EPERM;
> + goto out;
> + }
> +
> + switch (args->op) {
> + case KVM_S390_ZPCIOP_REG_AEN:

Please also check here that args->u.reg_aen.flags does not have any bits set
that we don't support here (otherwise, this could cause some trouble when
introducing additional flags later).

> + r = kvm_s390_pci_zpci_reg_aen(zdev, args);
> + break;
> + case KVM_S390_ZPCIOP_DEREG_AEN:
> + r = kvm_s390_pci_aif_disable(zdev, false);
> + break;
> + default:
> + r = -EINVAL;
> + }
> +
> +out:
> + mutex_unlock(&kvm->lock);
> + mutex_unlock(&zdev->kzdev_lock);
> + return r;
> +}
> +
> int kvm_s390_pci_init(void)
> {
> aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
> diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h
> index fb2b91b76e0c..0351382e990f 100644
> --- a/arch/s390/kvm/pci.h
> +++ b/arch/s390/kvm/pci.h
> @@ -59,6 +59,8 @@ void kvm_s390_pci_aen_exit(void);
> void kvm_s390_pci_init_list(struct kvm *kvm);
> void kvm_s390_pci_clear_list(struct kvm *kvm);
>
> +int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args);
> +
> int kvm_s390_pci_init(void);
> void kvm_s390_pci_exit(void);
>
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 6a184d260c7f..1d3d41523d10 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1152,6 +1152,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_DISABLE_QUIRKS2 213
> /* #define KVM_CAP_VM_TSC_CONTROL 214 */
> #define KVM_CAP_SYSTEM_EVENT_DATA 215
> +#define KVM_CAP_S390_ZPCI_OP 216
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -2068,4 +2069,34 @@ struct kvm_stats_desc {
> /* Available with KVM_CAP_XSAVE2 */
> #define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
>
> +/* Available with KVM_CAP_S390_ZPCI_OP */
> +#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd0, struct kvm_s390_zpci_op)

Please double-check whether this should be _IOWR instead (see above).

> +struct kvm_s390_zpci_op {
> + /* in */
> + __u32 fh; /* target device */
> + __u8 op; /* operation to perform */
> + __u8 pad[3];
> + union {
> + /* for KVM_S390_ZPCIOP_REG_AEN */
> + struct {
> + __u64 ibv; /* Guest addr of interrupt bit vector */
> + __u64 sb; /* Guest addr of summary bit */
> + __u32 flags;
> + __u32 noi; /* Number of interrupts */
> + __u8 isc; /* Guest interrupt subclass */
> + __u8 sbo; /* Offset of guest summary bit vector */
> + __u16 pad;
> + } reg_aen;
> + __u64 reserved[8];
> + } u;
> +};
> +
> +/* types for kvm_s390_zpci_op->op */
> +#define KVM_S390_ZPCIOP_REG_AEN 0
> +#define KVM_S390_ZPCIOP_DEREG_AEN 1
> +
> +/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
> +#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
> +
> #endif /* __LINUX_KVM_H */

Thomas


2022-05-17 01:15:45

by Christian Borntraeger

[permalink] [raw]
Subject: Re: [PATCH v7 20/22] KVM: s390: add KVM_S390_ZPCI_OP to manage guest zPCI devices

Am 16.05.22 um 17:35 schrieb Matthew Rosato:
> On 5/16/22 5:52 AM, Thomas Huth wrote:
>> On 13/05/2022 21.15, Matthew Rosato wrote:
>>> The KVM_S390_ZPCI_OP ioctl provides a mechanism for managing
>>> hardware-assisted virtualization features for s390X zPCI passthrough.
>>
>> s/s390X/s390x/
>>
>>> Add the first 2 operations, which can be used to enable/disable
>>> the specified device for Adapter Event Notification interpretation.
>>>
>>> Signed-off-by: Matthew Rosato <[email protected]>
>>> ---
>>>   Documentation/virt/kvm/api.rst | 45 +++++++++++++++++++
>>>   arch/s390/kvm/kvm-s390.c       | 23 ++++++++++
>>>   arch/s390/kvm/pci.c            | 81 ++++++++++++++++++++++++++++++++++
>>>   arch/s390/kvm/pci.h            |  2 +
>>>   include/uapi/linux/kvm.h       | 31 +++++++++++++
>>>   5 files changed, 182 insertions(+)
>>>
>>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
>>> index 4a900cdbc62e..a7cd5ebce031 100644
>>> --- a/Documentation/virt/kvm/api.rst
>>> +++ b/Documentation/virt/kvm/api.rst
>>> @@ -5645,6 +5645,51 @@ enabled with ``arch_prctl()``, but this may change in the future.
>>>   The offsets of the state save areas in struct kvm_xsave follow the contents
>>>   of CPUID leaf 0xD on the host.
>>> +4.135 KVM_S390_ZPCI_OP
>>> +--------------------
>>> +
>>> +:Capability: KVM_CAP_S390_ZPCI_OP
>>> +:Architectures: s390
>>> +:Type: vcpu ioctl
>>
>> vcpu? ... you're wiring it up in  kvm_arch_vm_ioctl() later, so I assume it's rather a VM ioctl?
>
> Yup, VM ioctl, bad copy/paste job...

Can you maybe resend just the 1 or 2 patches with feedback? In the end this series might be "old" and
good enough to still be queued for the next merge window.
Would be good if Jason (Gunthorpe) would double check that his concerns are addressed and I will
have a look at the patches without RB/ACK.

2022-05-18 09:23:39

by Thomas Huth

[permalink] [raw]
Subject: Re: [PATCH v7 20/22] KVM: s390: add KVM_S390_ZPCI_OP to manage guest zPCI devices

On 16/05/2022 17.35, Matthew Rosato wrote:
> On 5/16/22 5:52 AM, Thomas Huth wrote:
>> On 13/05/2022 21.15, Matthew Rosato wrote:
>>> The KVM_S390_ZPCI_OP ioctl provides a mechanism for managing
>>> hardware-assisted virtualization features for s390X zPCI passthrough.
>>
>> s/s390X/s390x/
>>
>>> Add the first 2 operations, which can be used to enable/disable
>>> the specified device for Adapter Event Notification interpretation.
>>>
>>> Signed-off-by: Matthew Rosato <[email protected]>
>>> ---
>>>   Documentation/virt/kvm/api.rst | 45 +++++++++++++++++++
>>>   arch/s390/kvm/kvm-s390.c       | 23 ++++++++++
>>>   arch/s390/kvm/pci.c            | 81 ++++++++++++++++++++++++++++++++++
>>>   arch/s390/kvm/pci.h            |  2 +
>>>   include/uapi/linux/kvm.h       | 31 +++++++++++++
>>>   5 files changed, 182 insertions(+)
>>>
>>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
>>> index 4a900cdbc62e..a7cd5ebce031 100644
>>> --- a/Documentation/virt/kvm/api.rst
>>> +++ b/Documentation/virt/kvm/api.rst
>>> @@ -5645,6 +5645,51 @@ enabled with ``arch_prctl()``, but this may change
>>> in the future.
>>>   The offsets of the state save areas in struct kvm_xsave follow the
>>> contents
>>>   of CPUID leaf 0xD on the host.
>>> +4.135 KVM_S390_ZPCI_OP
>>> +--------------------
>>> +
>>> +:Capability: KVM_CAP_S390_ZPCI_OP
>>> +:Architectures: s390
>>> +:Type: vcpu ioctl
>>
>> vcpu? ... you're wiring it up in  kvm_arch_vm_ioctl() later, so I assume
>> it's rather a VM ioctl?
>
> Yup, VM ioctl, bad copy/paste job...
>
>>
>>> +:Parameters: struct kvm_s390_zpci_op (in)
>>> +:Returns: 0 on success, <0 on error
>>> +
>>> +Used to manage hardware-assisted virtualization features for zPCI devices.
>>> +
>>> +Parameters are specified via the following structure::
>>> +
>>> +  struct kvm_s390_zpci_op {
>>> +    /* in */
>>
>> If all is "in", why is there a copy_to_user() in the code later?
>>
>
> Oh no, this is a leftover from a prior version...  Good catch.  There should
> no longer be a copy_to_user.
>
>>> +    __u32 fh;        /* target device */
>>> +    __u8  op;        /* operation to perform */
>>> +    __u8  pad[3];
>>> +    union {
>>> +        /* for KVM_S390_ZPCIOP_REG_AEN */
>>> +        struct {
>>> +            __u64 ibv;    /* Guest addr of interrupt bit vector */
>>> +            __u64 sb;    /* Guest addr of summary bit */
>>
>> If this is really a vcpu ioctl, what kind of addresses are you talking
>> about here? virtual addresses? real addresses? absolute addresses?
>
> It's a VM ioctl.  These are guest kernel physical addresses that are later
> pinned in arch/s390/kvm/pci.c:kvm_s390_pci_aif_enable() as part of handling
> the ioctl.
>
>>
>>> +            __u32 flags;
>>> +            __u32 noi;    /* Number of interrupts */
>>> +            __u8 isc;    /* Guest interrupt subclass */
>>> +            __u8 sbo;    /* Offset of guest summary bit vector */
>>> +            __u16 pad;
>>> +        } reg_aen;
>>> +        __u64 reserved[8];
>>> +    } u;
>>> +  };
>>> +
>>> +The type of operation is specified in the "op" field.
>>> +KVM_S390_ZPCIOP_REG_AEN is used to register the VM for adapter event
>>> +notification interpretation, which will allow firmware delivery of adapter
>>> +events directly to the vm, with KVM providing a backup delivery mechanism;
>>> +KVM_S390_ZPCIOP_DEREG_AEN is used to subsequently disable interpretation of
>>> +adapter event notifications.
>>> +
>>> +The target zPCI function must also be specified via the "fh" field. For the
>>> +KVM_S390_ZPCIOP_REG_AEN operation, additional information to establish
>>> firmware
>>> +delivery must be provided via the "reg_aen" struct.
>>> +
>>> +The "reserved" field is meant for future extensions.
>>
>> Maybe also mention the "pad" fields? And add should these also be
>> initialized to 0 by the calling userspace program?
>
> Sure, I can mention them.  And yes, I agree that userspace should initialize
> them to 0, I'll update the QEMU series accordingly.

I just spotted the corresponding patch in the QEMU series, and I think it
should already be fine there, since you're using "= { ... }" while declaring
the variables:

+int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev)
+{
+ struct kvm_s390_zpci_op args = {
+ .fh = pbdev->fh,
+ .op = KVM_S390_ZPCIOP_DEREG_AEN
+ };

That means unspecified fields will be set to 0 by the compiler already, as
far as I know.

Thomas