These routines will be invoked at the time an s390x vfio-pci device is
associated with a KVM (or when the association is removed), allowing
the zPCI device to enable or disable load/store intepretation mode;
this requires the host zPCI device to inform firmware of the unique
token (GISA designation) that is associated with the owning KVM.
Furthemore, add/remove these devices from a list associated with the
kvm and ensure proper cleanup always occurs during vm exit.
Signed-off-by: Matthew Rosato <[email protected]>
---
arch/s390/include/asm/kvm_host.h | 11 +++
arch/s390/kvm/kvm-s390.c | 5 +
arch/s390/kvm/pci.c | 163 +++++++++++++++++++++++++++++++
arch/s390/kvm/pci.h | 5 +
arch/s390/pci/pci.c | 3 +
5 files changed, 187 insertions(+)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8e381603b6a7..ef3364af6b34 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -967,6 +967,8 @@ struct kvm_arch{
DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
struct kvm_s390_gisa_interrupt gisa_int;
struct kvm_s390_pv pv;
+ struct list_head kzdev_list;
+ spinlock_t kzdev_list_lock;
};
#define KVM_HVA_ERR_BAD (-1UL)
@@ -1017,4 +1019,13 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+#ifdef CONFIG_PCI
+int kvm_s390_pci_register_kvm(struct device *dev, void *data);
+#else
+static inline int kvm_s390_pci_register_kvm(struct device *dev, void *data)
+{
+ return -EPERM;
+}
+#endif
+
#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 327649ddddce..704d85214f4f 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2864,6 +2864,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_crypto_init(kvm);
+ if (IS_ENABLED(CONFIG_VFIO_PCI))
+ kvm_s390_pci_init_list(kvm);
+
mutex_init(&kvm->arch.float_int.ais_lock);
spin_lock_init(&kvm->arch.float_int.lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++)
@@ -2949,6 +2952,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
if (!kvm_is_ucontrol(kvm))
gmap_remove(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
+ if (IS_ENABLED(CONFIG_VFIO_PCI))
+ kvm_s390_pci_clear_list(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index f0fd68569a9d..66565f5f3f43 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -12,7 +12,9 @@
#include <asm/pci.h>
#include <asm/pci_insn.h>
#include <asm/pci_io.h>
+#include <asm/sclp.h>
#include "pci.h"
+#include "kvm-s390.h"
struct zpci_aift *aift;
@@ -425,6 +427,167 @@ void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
}
EXPORT_SYMBOL_GPL(kvm_s390_pci_dev_release);
+static inline int register_kvm(struct zpci_dev *zdev, struct kvm *kvm)
+{
+ int rc;
+
+ if (zdev->kzdev || zdev->gisa != 0)
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+
+ rc = kvm_s390_pci_dev_open(zdev);
+ if (rc)
+ goto err;
+
+ /*
+ * If interpretation facilities aren't available, add the device to
+ * the kzdev list but don't enable for interpretation.
+ */
+ if (!kvm_s390_pci_interp_allowed())
+ goto out;
+
+ /*
+ * If this is the first request to use an interpreted device, make the
+ * necessary vcpu changes
+ */
+ if (!kvm->arch.use_zpci_interp)
+ kvm_s390_vcpu_pci_enable_interp(kvm);
+
+ if (zdev_enabled(zdev)) {
+ rc = zpci_disable_device(zdev);
+ if (rc)
+ goto err;
+ }
+
+ /*
+ * Store information about the identity of the kvm guest allowed to
+ * access this device via interpretation to be used by host CLP
+ */
+ zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
+
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ goto clear_gisa;
+
+ /* Re-register the IOMMU that was already created */
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ virt_to_phys(zdev->dma_table));
+ if (rc)
+ goto clear_gisa;
+
+out:
+ zdev->kzdev->kvm = kvm;
+
+ spin_lock(&kvm->arch.kzdev_list_lock);
+ list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
+ spin_unlock(&kvm->arch.kzdev_list_lock);
+
+ mutex_unlock(&kvm->lock);
+ return 0;
+
+clear_gisa:
+ zdev->gisa = 0;
+err:
+ if (zdev->kzdev)
+ kvm_s390_pci_dev_release(zdev);
+ mutex_unlock(&kvm->lock);
+ return rc;
+}
+
+static inline int unregister_kvm(struct zpci_dev *zdev)
+{
+ struct kvm *kvm;
+ int rc;
+
+ if (!zdev->kzdev)
+ return -EINVAL;
+
+ kvm = zdev->kzdev->kvm;
+ mutex_lock(&kvm->lock);
+
+ /*
+ * A 0 gisa means interpretation was never enabled, just remove the
+ * device from the list.
+ */
+ if (zdev->gisa == 0)
+ goto out;
+
+ /* Forwarding must be turned off before interpretation */
+ if (zdev->kzdev->fib.fmt0.aibv != 0)
+ kvm_s390_pci_aif_disable(zdev, true);
+
+ /* Remove the host CLP guest designation */
+ zdev->gisa = 0;
+
+ if (zdev_enabled(zdev)) {
+ rc = zpci_disable_device(zdev);
+ if (rc)
+ goto out;
+ }
+
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ goto out;
+
+ /* Re-register the IOMMU that was already created */
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ virt_to_phys(zdev->dma_table));
+
+out:
+ spin_lock(&kvm->arch.kzdev_list_lock);
+ list_del(&zdev->kzdev->entry);
+ spin_unlock(&kvm->arch.kzdev_list_lock);
+ kvm_s390_pci_dev_release(zdev);
+
+ mutex_unlock(&kvm->lock);
+
+ return rc;
+}
+
+int kvm_s390_pci_register_kvm(struct device *dev, void *data)
+{
+ struct zpci_dev *zdev = NULL;
+ struct kvm *kvm = data;
+
+ /* Only proceed for zPCI devices, quietly ignore others */
+ if (dev_is_pci(dev))
+ zdev = to_zpci_dev(dev);
+ if (!zdev)
+ return 0;
+
+ /*
+ * Register all devices with this KVM. If interpetation facilities
+ * are available, enable them and let userspace indicate whether or
+ * not they will be used (specify SHM bit to disable).
+ */
+ if (kvm)
+ return register_kvm(zdev, kvm);
+ else
+ return unregister_kvm(zdev);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pci_register_kvm);
+
+void kvm_s390_pci_init_list(struct kvm *kvm)
+{
+ spin_lock_init(&kvm->arch.kzdev_list_lock);
+ INIT_LIST_HEAD(&kvm->arch.kzdev_list);
+}
+
+void kvm_s390_pci_clear_list(struct kvm *kvm)
+{
+ struct kvm_zdev *tmp, *kzdev;
+ LIST_HEAD(remove);
+
+ spin_lock(&kvm->arch.kzdev_list_lock);
+ list_for_each_entry_safe(kzdev, tmp, &kvm->arch.kzdev_list, entry)
+ list_move_tail(&kzdev->entry, &remove);
+ spin_unlock(&kvm->arch.kzdev_list_lock);
+
+ list_for_each_entry_safe(kzdev, tmp, &remove, entry)
+ unregister_kvm(kzdev->zdev);
+}
+
int kvm_s390_pci_init(void)
{
aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h
index b4bf3d1d4b66..cb5ec3208923 100644
--- a/arch/s390/kvm/pci.h
+++ b/arch/s390/kvm/pci.h
@@ -13,6 +13,7 @@
#include <linux/kvm_host.h>
#include <linux/pci.h>
#include <linux/mutex.h>
+#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/airq.h>
#include <asm/cpu.h>
@@ -21,6 +22,7 @@ struct kvm_zdev {
struct zpci_dev *zdev;
struct kvm *kvm;
struct zpci_fib fib;
+ struct list_head entry;
};
struct zpci_gaite {
@@ -54,6 +56,9 @@ static inline struct kvm *kvm_s390_pci_si_to_kvm(struct zpci_aift *aift,
int kvm_s390_pci_aen_init(u8 nisc);
void kvm_s390_pci_aen_exit(void);
+void kvm_s390_pci_init_list(struct kvm *kvm);
+void kvm_s390_pci_clear_list(struct kvm *kvm);
+
int kvm_s390_pci_init(void);
static inline bool kvm_s390_pci_interp_allowed(void)
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index f0a439c43395..d9b021fb84d5 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -132,6 +132,7 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
return cc;
}
+EXPORT_SYMBOL_GPL(zpci_register_ioat);
/* Modify PCI: Unregister I/O address translation parameters */
int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
@@ -712,6 +713,7 @@ int zpci_enable_device(struct zpci_dev *zdev)
zpci_update_fh(zdev, fh);
return rc;
}
+EXPORT_SYMBOL_GPL(zpci_enable_device);
int zpci_disable_device(struct zpci_dev *zdev)
{
@@ -735,6 +737,7 @@ int zpci_disable_device(struct zpci_dev *zdev)
}
return rc;
}
+EXPORT_SYMBOL_GPL(zpci_disable_device);
/**
* zpci_hot_reset_device - perform a reset of the given zPCI function
--
2.27.0
On Mon, Apr 04, 2022 at 01:43:43PM -0400, Matthew Rosato wrote:
> +int kvm_s390_pci_register_kvm(struct device *dev, void *data)
> +{
> + struct zpci_dev *zdev = NULL;
> + struct kvm *kvm = data;
> +
> + /* Only proceed for zPCI devices, quietly ignore others */
> + if (dev_is_pci(dev))
> + zdev = to_zpci_dev(dev);
> + if (!zdev)
> + return 0;
Especially since this only works if we have zpci device
So having the zpci code hook the kvm notifier and then call the arch
code from the zpci area seems pretty OK
Also why is a struct kvm * being passed as a void *?
Jason
On 4/8/22 8:47 AM, Jason Gunthorpe wrote:
> On Mon, Apr 04, 2022 at 01:43:43PM -0400, Matthew Rosato wrote:
>> +int kvm_s390_pci_register_kvm(struct device *dev, void *data)
>> +{
>> + struct zpci_dev *zdev = NULL;
>> + struct kvm *kvm = data;
>> +
>> + /* Only proceed for zPCI devices, quietly ignore others */
>> + if (dev_is_pci(dev))
>> + zdev = to_zpci_dev(dev);
>> + if (!zdev)
>> + return 0;
>
> Especially since this only works if we have zpci device
>
> So having the zpci code hook the kvm notifier and then call the arch
> code from the zpci area seems pretty OK
>
> Also why is a struct kvm * being passed as a void *?
Only because the function is intended to be called via
iommu_group_for_each_dev (next patch) which requires int (*fn)(struct
device *, void *)
On Tue, Apr 12, 2022 at 09:14:36AM -0400, Matthew Rosato wrote:
> On 4/8/22 8:47 AM, Jason Gunthorpe wrote:
> > On Mon, Apr 04, 2022 at 01:43:43PM -0400, Matthew Rosato wrote:
> > > +int kvm_s390_pci_register_kvm(struct device *dev, void *data)
> > > +{
> > > + struct zpci_dev *zdev = NULL;
> > > + struct kvm *kvm = data;
> > > +
> > > + /* Only proceed for zPCI devices, quietly ignore others */
> > > + if (dev_is_pci(dev))
> > > + zdev = to_zpci_dev(dev);
> > > + if (!zdev)
> > > + return 0;
> >
> > Especially since this only works if we have zpci device
> >
> > So having the zpci code hook the kvm notifier and then call the arch
> > code from the zpci area seems pretty OK
> >
> > Also why is a struct kvm * being passed as a void *?
>
> Only because the function is intended to be called via
> iommu_group_for_each_dev (next patch) which requires int (*fn)(struct device
> *, void *)
I think this further says this should be called from vfio on the
actual struct device that is assigned to the KVM, not try to deduce it
from the gorup..
Jason