2022-01-14 23:07:06

by Matthew Rosato

[permalink] [raw]
Subject: [PATCH v2 19/30] KVM: s390: pci: provide routines for enabling/disabling interrupt forwarding

These routines will be wired into the vfio_pci_zdev ioctl handlers to
respond to requests to enable / disable a device for Adapter Event
Notifications / Adapter Interuption Forwarding.

Signed-off-by: Matthew Rosato <[email protected]>
---
arch/s390/include/asm/kvm_pci.h | 7 ++
arch/s390/kvm/pci.c | 203 ++++++++++++++++++++++++++++++++
arch/s390/pci/pci_insn.c | 1 +
3 files changed, 211 insertions(+)

diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
index 072401aa7922..01fe14fffd7a 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -16,16 +16,23 @@
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/pci.h>
+#include <asm/pci_insn.h>

struct kvm_zdev {
struct zpci_dev *zdev;
struct kvm *kvm;
+ struct zpci_fib fib;
};

int kvm_s390_pci_dev_open(struct zpci_dev *zdev);
void kvm_s390_pci_dev_release(struct zpci_dev *zdev);
void kvm_s390_pci_attach_kvm(struct zpci_dev *zdev, struct kvm *kvm);

+int kvm_s390_pci_aif_probe(struct zpci_dev *zdev);
+int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
+ bool assist);
+int kvm_s390_pci_aif_disable(struct zpci_dev *zdev);
+
int kvm_s390_pci_interp_probe(struct zpci_dev *zdev);
int kvm_s390_pci_interp_enable(struct zpci_dev *zdev);
int kvm_s390_pci_interp_disable(struct zpci_dev *zdev);
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 122d0992b521..7ed9abc476b6 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -12,6 +12,7 @@
#include <asm/kvm_pci.h>
#include <asm/pci.h>
#include <asm/pci_insn.h>
+#include <asm/pci_io.h>
#include <asm/sclp.h>
#include "pci.h"
#include "kvm-s390.h"
@@ -145,6 +146,204 @@ int kvm_s390_pci_aen_init(u8 nisc)
return rc;
}

+/* Modify PCI: Register floating adapter interruption forwarding */
+static int kvm_zpci_set_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+ struct zpci_fib fib = {0};
+ u8 status;
+
+ fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
+ fib.fmt0.sum = 1; /* enable summary notifications */
+ fib.fmt0.noi = airq_iv_end(zdev->aibv);
+ fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
+ fib.fmt0.aibvo = 0;
+ fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+ fib.fmt0.aisbo = zdev->aisb & 63;
+ fib.gd = zdev->gd;
+
+ return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister floating adapter interruption forwarding */
+static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
+ struct zpci_fib fib = {0};
+ u8 cc, status;
+
+ fib.gd = zdev->gd;
+
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3 || (cc == 1 && status == 24))
+ /* Function already gone or IRQs already deregistered. */
+ cc = 0;
+
+ return cc ? -EIO : 0;
+}
+
+int kvm_s390_pci_aif_probe(struct zpci_dev *zdev)
+{
+ /* Must have appropriate hardware facilities */
+ if (!(sclp.has_aeni && test_facility(71)))
+ return -EINVAL;
+
+ /* Must have a KVM association registered */
+ if (!zdev->kzdev || !zdev->kzdev->kvm)
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pci_aif_probe);
+
+int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
+ bool assist)
+{
+ struct page *aibv_page, *aisb_page = NULL;
+ unsigned int msi_vecs, idx;
+ struct zpci_gaite *gaite;
+ unsigned long bit;
+ struct kvm *kvm;
+ phys_addr_t gaddr;
+ int rc = 0;
+
+ /*
+ * Interrupt forwarding is only applicable if the device is already
+ * enabled for interpretation
+ */
+ if (zdev->gd == 0)
+ return -EINVAL;
+
+ kvm = zdev->kzdev->kvm;
+ msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
+
+ /* Replace AIBV address */
+ idx = srcu_read_lock(&kvm->srcu);
+ aibv_page = gfn_to_page(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
+ srcu_read_unlock(&kvm->srcu, idx);
+ if (is_error_page(aibv_page)) {
+ rc = -EIO;
+ goto out;
+ }
+ gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
+ fib->fmt0.aibv = gaddr;
+
+ /* Pin the guest AISB if one was specified */
+ if (fib->fmt0.sum == 1) {
+ idx = srcu_read_lock(&kvm->srcu);
+ aisb_page = gfn_to_page(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
+ srcu_read_unlock(&kvm->srcu, idx);
+ if (is_error_page(aisb_page)) {
+ rc = -EIO;
+ goto unpin1;
+ }
+ }
+
+ /* AISB must be allocated before we can fill in GAITE */
+ mutex_lock(&aift->lock);
+ bit = airq_iv_alloc_bit(aift->sbv);
+ if (bit == -1UL)
+ goto unpin2;
+ zdev->aisb = bit;
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
+ AIRQ_IV_BITLOCK |
+ AIRQ_IV_GUESTVEC,
+ (unsigned long *)fib->fmt0.aibv);
+
+ spin_lock_irq(&aift->gait_lock);
+ gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
+ sizeof(struct zpci_gaite));
+
+ /* If assist not requested, host will get all alerts */
+ if (assist)
+ gaite->gisa = (u32)(u64)&kvm->arch.sie_page2->gisa;
+ else
+ gaite->gisa = 0;
+
+ gaite->gisc = fib->fmt0.isc;
+ gaite->count++;
+ gaite->aisbo = fib->fmt0.aisbo;
+ gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb &
+ ~PAGE_MASK));
+ aift->kzdev[zdev->aisb] = zdev->kzdev;
+ spin_unlock_irq(&aift->gait_lock);
+
+ /* Update guest FIB for re-issue */
+ fib->fmt0.aisbo = zdev->aisb & 63;
+ fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+ fib->fmt0.isc = kvm_s390_gisc_register(kvm, gaite->gisc);
+
+ /* Save some guest fib values in the host for later use */
+ zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
+ zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
+ mutex_unlock(&aift->lock);
+
+ /* Issue the clp to setup the irq now */
+ rc = kvm_zpci_set_airq(zdev);
+ return rc;
+
+unpin2:
+ mutex_unlock(&aift->lock);
+ if (fib->fmt0.sum == 1) {
+ gaddr = page_to_phys(aisb_page);
+ kvm_release_pfn_dirty(gaddr >> PAGE_SHIFT);
+ }
+unpin1:
+ kvm_release_pfn_dirty(fib->fmt0.aibv >> PAGE_SHIFT);
+out:
+ return rc;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pci_aif_enable);
+
+int kvm_s390_pci_aif_disable(struct zpci_dev *zdev)
+{
+ struct kvm_zdev *kzdev = zdev->kzdev;
+ struct zpci_gaite *gaite;
+ int rc;
+ u8 isc;
+
+ if (zdev->gd == 0)
+ return -EINVAL;
+
+ /* Even if the clear fails due to an error, clear the GAITE */
+ rc = kvm_zpci_clear_airq(zdev);
+
+ mutex_lock(&aift->lock);
+ if (zdev->kzdev->fib.fmt0.aibv == 0)
+ goto out;
+ spin_lock_irq(&aift->gait_lock);
+ gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
+ sizeof(struct zpci_gaite));
+ isc = gaite->gisc;
+ gaite->count--;
+ if (gaite->count == 0) {
+ /* Release guest AIBV and AISB */
+ kvm_release_pfn_dirty(kzdev->fib.fmt0.aibv >> PAGE_SHIFT);
+ if (gaite->aisb != 0)
+ kvm_release_pfn_dirty(gaite->aisb >> PAGE_SHIFT);
+ /* Clear the GAIT entry */
+ gaite->aisb = 0;
+ gaite->gisc = 0;
+ gaite->aisbo = 0;
+ gaite->gisa = 0;
+ aift->kzdev[zdev->aisb] = 0;
+ /* Clear zdev info */
+ airq_iv_free_bit(aift->sbv, zdev->aisb);
+ airq_iv_release(zdev->aibv);
+ zdev->aisb = 0;
+ zdev->aibv = NULL;
+ }
+ spin_unlock_irq(&aift->gait_lock);
+ kvm_s390_gisc_unregister(kzdev->kvm, isc);
+ kzdev->fib.fmt0.isc = 0;
+ kzdev->fib.fmt0.aibv = 0;
+out:
+ mutex_unlock(&aift->lock);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pci_aif_disable);
+
int kvm_s390_pci_interp_probe(struct zpci_dev *zdev)
{
/* Must have appropriate hardware facilities */
@@ -221,6 +420,10 @@ int kvm_s390_pci_interp_disable(struct zpci_dev *zdev)
if (zdev->gd == 0)
return -EINVAL;

+ /* Forwarding must be turned off before interpretation */
+ if (zdev->kzdev->fib.fmt0.aibv != 0)
+ kvm_s390_pci_aif_disable(zdev);
+
/* Remove the host CLP guest designation */
zdev->gd = 0;

diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index ca6399d52767..f7d0e29bbf0b 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -59,6 +59,7 @@ u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)

return cc;
}
+EXPORT_SYMBOL_GPL(zpci_mod_fc);

/* Refresh PCI Translations */
static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
--
2.27.0


2022-01-25 17:01:12

by Pierre Morel

[permalink] [raw]
Subject: Re: [PATCH v2 19/30] KVM: s390: pci: provide routines for enabling/disabling interrupt forwarding



On 1/14/22 21:31, Matthew Rosato wrote:
> These routines will be wired into the vfio_pci_zdev ioctl handlers to
> respond to requests to enable / disable a device for Adapter Event
> Notifications / Adapter Interuption Forwarding.
>
> Signed-off-by: Matthew Rosato <[email protected]>
> ---
> arch/s390/include/asm/kvm_pci.h | 7 ++
> arch/s390/kvm/pci.c | 203 ++++++++++++++++++++++++++++++++
> arch/s390/pci/pci_insn.c | 1 +
> 3 files changed, 211 insertions(+)
>
> diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
> index 072401aa7922..01fe14fffd7a 100644
> --- a/arch/s390/include/asm/kvm_pci.h
> +++ b/arch/s390/include/asm/kvm_pci.h
> @@ -16,16 +16,23 @@
> #include <linux/kvm_host.h>
> #include <linux/kvm.h>
> #include <linux/pci.h>
> +#include <asm/pci_insn.h>
>
> struct kvm_zdev {
> struct zpci_dev *zdev;
> struct kvm *kvm;
> + struct zpci_fib fib;
> };
>
> int kvm_s390_pci_dev_open(struct zpci_dev *zdev);
> void kvm_s390_pci_dev_release(struct zpci_dev *zdev);
> void kvm_s390_pci_attach_kvm(struct zpci_dev *zdev, struct kvm *kvm);
>
> +int kvm_s390_pci_aif_probe(struct zpci_dev *zdev);
> +int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
> + bool assist);
> +int kvm_s390_pci_aif_disable(struct zpci_dev *zdev);
> +
> int kvm_s390_pci_interp_probe(struct zpci_dev *zdev);
> int kvm_s390_pci_interp_enable(struct zpci_dev *zdev);
> int kvm_s390_pci_interp_disable(struct zpci_dev *zdev);
> diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
> index 122d0992b521..7ed9abc476b6 100644
> --- a/arch/s390/kvm/pci.c
> +++ b/arch/s390/kvm/pci.c
> @@ -12,6 +12,7 @@
> #include <asm/kvm_pci.h>
> #include <asm/pci.h>
> #include <asm/pci_insn.h>
> +#include <asm/pci_io.h>
> #include <asm/sclp.h>
> #include "pci.h"
> #include "kvm-s390.h"
> @@ -145,6 +146,204 @@ int kvm_s390_pci_aen_init(u8 nisc)
> return rc;
> }
>
> +/* Modify PCI: Register floating adapter interruption forwarding */
> +static int kvm_zpci_set_airq(struct zpci_dev *zdev)
> +{
> + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
> + struct zpci_fib fib = {0};

I prefer {} instead of {0} even it does the same it looks wrong to me.

> + u8 status;
> +
> + fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
> + fib.fmt0.sum = 1; /* enable summary notifications */
> + fib.fmt0.noi = airq_iv_end(zdev->aibv);
> + fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
> + fib.fmt0.aibvo = 0;
> + fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
> + fib.fmt0.aisbo = zdev->aisb & 63;
> + fib.gd = zdev->gd;
> +
> + return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
> +}
> +
> +/* Modify PCI: Unregister floating adapter interruption forwarding */
> +static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
> +{
> + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
> + struct zpci_fib fib = {0};

same here

> + u8 cc, status;
> +
> + fib.gd = zdev->gd;
> +
> + cc = zpci_mod_fc(req, &fib, &status);
> + if (cc == 3 || (cc == 1 && status == 24))
> + /* Function already gone or IRQs already deregistered. */
> + cc = 0;
> +
> + return cc ? -EIO : 0;
> +}
> +
> +int kvm_s390_pci_aif_probe(struct zpci_dev *zdev)
> +{
> + /* Must have appropriate hardware facilities */
> + if (!(sclp.has_aeni && test_facility(71)))
> + return -EINVAL;
> +
> + /* Must have a KVM association registered */
> + if (!zdev->kzdev || !zdev->kzdev->kvm)
> + return -EINVAL;
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_s390_pci_aif_probe);
> +
> +int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
> + bool assist)
> +{
> + struct page *aibv_page, *aisb_page = NULL;
> + unsigned int msi_vecs, idx;
> + struct zpci_gaite *gaite;
> + unsigned long bit;
> + struct kvm *kvm;
> + phys_addr_t gaddr;
> + int rc = 0;
> +
> + /*
> + * Interrupt forwarding is only applicable if the device is already
> + * enabled for interpretation
> + */
> + if (zdev->gd == 0)
> + return -EINVAL;
> +
> + kvm = zdev->kzdev->kvm;
> + msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
> +
> + /* Replace AIBV address */
> + idx = srcu_read_lock(&kvm->srcu);
> + aibv_page = gfn_to_page(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
> + srcu_read_unlock(&kvm->srcu, idx);
> + if (is_error_page(aibv_page)) {
> + rc = -EIO;
> + goto out;
> + }
> + gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
> + fib->fmt0.aibv = gaddr;
> +
> + /* Pin the guest AISB if one was specified */
> + if (fib->fmt0.sum == 1) {
> + idx = srcu_read_lock(&kvm->srcu);
> + aisb_page = gfn_to_page(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
> + srcu_read_unlock(&kvm->srcu, idx);
> + if (is_error_page(aisb_page)) {
> + rc = -EIO;
> + goto unpin1;
> + }
> + }
> +
> + /* AISB must be allocated before we can fill in GAITE */
> + mutex_lock(&aift->lock);
> + bit = airq_iv_alloc_bit(aift->sbv);
> + if (bit == -1UL)
> + goto unpin2;
> + zdev->aisb = bit;

aisb here is the aisb offset right?
Then may be add a comment as in gait and fmt0 aisb is an address.

> + zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
> + AIRQ_IV_BITLOCK |
> + AIRQ_IV_GUESTVEC,
> + (unsigned long *)fib->fmt0.aibv);

phys_to_virt ?

> +
> + spin_lock_irq(&aift->gait_lock);
> + gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
> + sizeof(struct zpci_gaite));
> +
> + /* If assist not requested, host will get all alerts */
> + if (assist)
> + gaite->gisa = (u32)(u64)&kvm->arch.sie_page2->gisa;

virt_to_phys ?

> + else
> + gaite->gisa = 0;
> +
> + gaite->gisc = fib->fmt0.isc;
> + gaite->count++;
> + gaite->aisbo = fib->fmt0.aisbo;
> + gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb &
> + ~PAGE_MASK));
> + aift->kzdev[zdev->aisb] = zdev->kzdev;
> + spin_unlock_irq(&aift->gait_lock);
> +
> + /* Update guest FIB for re-issue */
> + fib->fmt0.aisbo = zdev->aisb & 63;
> + fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
> + fib->fmt0.isc = kvm_s390_gisc_register(kvm, gaite->gisc);
> +
> + /* Save some guest fib values in the host for later use */
> + zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
> + zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
> + mutex_unlock(&aift->lock);
> +
> + /* Issue the clp to setup the irq now */
> + rc = kvm_zpci_set_airq(zdev);
> + return rc;
> +
> +unpin2:
> + mutex_unlock(&aift->lock);
> + if (fib->fmt0.sum == 1) {
> + gaddr = page_to_phys(aisb_page);
> + kvm_release_pfn_dirty(gaddr >> PAGE_SHIFT);
> + }
> +unpin1:
> + kvm_release_pfn_dirty(fib->fmt0.aibv >> PAGE_SHIFT);
> +out:
> + return rc;
> +}
> +EXPORT_SYMBOL_GPL(kvm_s390_pci_aif_enable);
> +
> +int kvm_s390_pci_aif_disable(struct zpci_dev *zdev)
> +{
> + struct kvm_zdev *kzdev = zdev->kzdev;
> + struct zpci_gaite *gaite;
> + int rc;
> + u8 isc;
> +
> + if (zdev->gd == 0)
> + return -EINVAL;
> +
> + /* Even if the clear fails due to an error, clear the GAITE */
> + rc = kvm_zpci_clear_airq(zdev);

Having a look at kvm_zpci_clear_airq() the only possible error seems to
be when an error recovery is in progress.
The error returned for a wrong FH, function does not exist anymore, or
if the interrupt vectors are already deregistered by the instruction are
returned as success by the function.

How can we be sure that we have no conflict with a recovery in progress?
Shouldn't we in this case let the recovery process handle the function
and stop here?

Doesn't the aif lock mutex placed after and not before the clear_irq
open a door for race condition with the recovery?

> +
> + mutex_lock(&aift->lock);
> + if (zdev->kzdev->fib.fmt0.aibv == 0)
> + goto out;
> + spin_lock_irq(&aift->gait_lock);
> + gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
> + sizeof(struct zpci_gaite));
> + isc = gaite->gisc;
> + gaite->count--;
> + if (gaite->count == 0) {
> + /* Release guest AIBV and AISB */
> + kvm_release_pfn_dirty(kzdev->fib.fmt0.aibv >> PAGE_SHIFT);
> + if (gaite->aisb != 0)
> + kvm_release_pfn_dirty(gaite->aisb >> PAGE_SHIFT);
> + /* Clear the GAIT entry */
> + gaite->aisb = 0;
> + gaite->gisc = 0;
> + gaite->aisbo = 0;
> + gaite->gisa = 0;
> + aift->kzdev[zdev->aisb] = 0;
> + /* Clear zdev info */
> + airq_iv_free_bit(aift->sbv, zdev->aisb);
> + airq_iv_release(zdev->aibv);
> + zdev->aisb = 0;
> + zdev->aibv = NULL;
> + }
> + spin_unlock_irq(&aift->gait_lock);
> + kvm_s390_gisc_unregister(kzdev->kvm, isc);

Don't we need to check the return value?
And maybe to report it to the caller?

> + kzdev->fib.fmt0.isc = 0;
> + kzdev->fib.fmt0.aibv = 0;
> +out:
> + mutex_unlock(&aift->lock);
> +
> + return rc;
> +}
> +EXPORT_SYMBOL_GPL(kvm_s390_pci_aif_disable);
> +
> int kvm_s390_pci_interp_probe(struct zpci_dev *zdev)
> {
> /* Must have appropriate hardware facilities */
> @@ -221,6 +420,10 @@ int kvm_s390_pci_interp_disable(struct zpci_dev *zdev)
> if (zdev->gd == 0)
> return -EINVAL;
>
> + /* Forwarding must be turned off before interpretation */
> + if (zdev->kzdev->fib.fmt0.aibv != 0)
> + kvm_s390_pci_aif_disable(zdev);
> +
> /* Remove the host CLP guest designation */
> zdev->gd = 0;
>
> diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
> index ca6399d52767..f7d0e29bbf0b 100644
> --- a/arch/s390/pci/pci_insn.c
> +++ b/arch/s390/pci/pci_insn.c
> @@ -59,6 +59,7 @@ u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
>
> return cc;
> }
> +EXPORT_SYMBOL_GPL(zpci_mod_fc);
>
> /* Refresh PCI Translations */
> static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
>

--
Pierre Morel
IBM Lab Boeblingen

2022-01-25 22:50:52

by Matthew Rosato

[permalink] [raw]
Subject: Re: [PATCH v2 19/30] KVM: s390: pci: provide routines for enabling/disabling interrupt forwarding

On 1/25/22 7:41 AM, Pierre Morel wrote:
>
>
> On 1/14/22 21:31, Matthew Rosato wrote:
...
>> +/* Modify PCI: Register floating adapter interruption forwarding */
>> +static int kvm_zpci_set_airq(struct zpci_dev *zdev)
>> +{
>> +    u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
>> +    struct zpci_fib fib = {0};
>
> I prefer {} instead of {0} even it does the same it looks wrong to me.
>

OK

...

>> +int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
>> +                bool assist)
>> +{
>> +    struct page *aibv_page, *aisb_page = NULL;
>> +    unsigned int msi_vecs, idx;
>> +    struct zpci_gaite *gaite;
>> +    unsigned long bit;
>> +    struct kvm *kvm;
>> +    phys_addr_t gaddr;
>> +    int rc = 0;
>> +
>> +    /*
>> +     * Interrupt forwarding is only applicable if the device is already
>> +     * enabled for interpretation
>> +     */
>> +    if (zdev->gd == 0)
>> +        return -EINVAL;
>> +
>> +    kvm = zdev->kzdev->kvm;
>> +    msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
>> +
>> +    /* Replace AIBV address */
>> +    idx = srcu_read_lock(&kvm->srcu);
>> +    aibv_page = gfn_to_page(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
>> +    srcu_read_unlock(&kvm->srcu, idx);
>> +    if (is_error_page(aibv_page)) {
>> +        rc = -EIO;
>> +        goto out;
>> +    }
>> +    gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
>> +    fib->fmt0.aibv = gaddr;
>> +
>> +    /* Pin the guest AISB if one was specified */
>> +    if (fib->fmt0.sum == 1) {
>> +        idx = srcu_read_lock(&kvm->srcu);
>> +        aisb_page = gfn_to_page(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
>> +        srcu_read_unlock(&kvm->srcu, idx);
>> +        if (is_error_page(aisb_page)) {
>> +            rc = -EIO;
>> +            goto unpin1;
>> +        }
>> +    }
>> +
>> +    /* AISB must be allocated before we can fill in GAITE */
>> +    mutex_lock(&aift->lock);
>> +    bit = airq_iv_alloc_bit(aift->sbv);
>> +    if (bit == -1UL)
>> +        goto unpin2;
>> +    zdev->aisb = bit;
>
> aisb here is the aisb offset right?

Yes

> Then may be add a comment as in gait and fmt0 aisb is an address.

Sure, good point

>
>> +    zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
>> +                          AIRQ_IV_BITLOCK |
>> +                          AIRQ_IV_GUESTVEC,
>> +                    (unsigned long *)fib->fmt0.aibv);
>
> phys_to_virt ?

Ugh, yep -- we just put the physical address in fib->fmt0.aibv a few
lines earlier via page_to_phys

>
>> +
>> +    spin_lock_irq(&aift->gait_lock);
>> +    gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
>> +                           sizeof(struct zpci_gaite));
>> +
>> +    /* If assist not requested, host will get all alerts */
>> +    if (assist)
>> +        gaite->gisa = (u32)(u64)&kvm->arch.sie_page2->gisa;
>
> virt_to_phys ?

Yes

>
>> +    else
>> +        gaite->gisa = 0;
>> +
>> +    gaite->gisc = fib->fmt0.isc;
>> +    gaite->count++;
>> +    gaite->aisbo = fib->fmt0.aisbo;
>> +    gaite->aisb = virt_to_phys(page_address(aisb_page) +
>> (fib->fmt0.aisb &
>> +                                  ~PAGE_MASK));
>> +    aift->kzdev[zdev->aisb] = zdev->kzdev;
>> +    spin_unlock_irq(&aift->gait_lock);
>> +
>> +    /* Update guest FIB for re-issue */
>> +    fib->fmt0.aisbo = zdev->aisb & 63;
>> +    fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb /
>> 64) * 8);
>> +    fib->fmt0.isc = kvm_s390_gisc_register(kvm, gaite->gisc);
>> +
>> +    /* Save some guest fib values in the host for later use */
>> +    zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
>> +    zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
>> +    mutex_unlock(&aift->lock);
>> +
>> +    /* Issue the clp to setup the irq now */
>> +    rc = kvm_zpci_set_airq(zdev);
>> +    return rc;
>> +
>> +unpin2:
>> +    mutex_unlock(&aift->lock);
>> +    if (fib->fmt0.sum == 1) {
>> +        gaddr = page_to_phys(aisb_page);
>> +        kvm_release_pfn_dirty(gaddr >> PAGE_SHIFT);
>> +    }
>> +unpin1:
>> +    kvm_release_pfn_dirty(fib->fmt0.aibv >> PAGE_SHIFT);
>> +out:
>> +    return rc;
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_s390_pci_aif_enable);
>> +
>> +int kvm_s390_pci_aif_disable(struct zpci_dev *zdev)
>> +{
>> +    struct kvm_zdev *kzdev = zdev->kzdev;
>> +    struct zpci_gaite *gaite;
>> +    int rc;
>> +    u8 isc;
>> +
>> +    if (zdev->gd == 0)
>> +        return -EINVAL;
>> +
>> +    /* Even if the clear fails due to an error, clear the GAITE */
>> +    rc = kvm_zpci_clear_airq(zdev);
>
> Having a look at kvm_zpci_clear_airq() the only possible error seems to
> be when an error recovery is in progress.
> The error returned for a wrong FH, function does not exist anymore, or
> if the interrupt vectors are already deregistered by the instruction are
> returned as success by the function.
>
> How can we be sure that we have no conflict with a recovery in progress?
> Shouldn't we in this case let the recovery process handle the function
> and stop here?

Hmm -- So I think for a userspace-initiated call to this routine, yes.
We could then assume recovery takes care of things. However, we also
call this routine from vfio-pci core when closing the device...

So then let's look at how this would work -- the current recovery action
for passthrough is always PCI_ERS_RESULT_DISCONNECT. The process of
disconnecting the device will trigger vfio-pci to close it's device,
which in turn will trigger vfio_pci_zdev_release() which will in turn
also call kvm_390_aif_disable as part of cleanup. However, in this case
now we want to clear the GAITE anyway even if kvm_zpci_clear_airq(zdev)
fails now because we know the device is for sure going away.

I think I need some sort of input to this routine that indicates we must
cleanup (bool force or something) which would only be specified by the
call from vfio_pci_zdev_release().

>
> Doesn't the aif lock mutex placed after and not before the clear_irq
> open a door for race condition with the recovery?

Good point.

>
>> +
>> +    mutex_lock(&aift->lock);
>> +    if (zdev->kzdev->fib.fmt0.aibv == 0)
>> +        goto out;
>> +    spin_lock_irq(&aift->gait_lock);
>> +    gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
>> +                           sizeof(struct zpci_gaite));
>> +    isc = gaite->gisc;
>> +    gaite->count--;
>> +    if (gaite->count == 0) {
>> +        /* Release guest AIBV and AISB */
>> +        kvm_release_pfn_dirty(kzdev->fib.fmt0.aibv >> PAGE_SHIFT);
>> +        if (gaite->aisb != 0)
>> +            kvm_release_pfn_dirty(gaite->aisb >> PAGE_SHIFT);
>> +        /* Clear the GAIT entry */
>> +        gaite->aisb = 0;
>> +        gaite->gisc = 0;
>> +        gaite->aisbo = 0;
>> +        gaite->gisa = 0;
>> +        aift->kzdev[zdev->aisb] = 0;
>> +        /* Clear zdev info */
>> +        airq_iv_free_bit(aift->sbv, zdev->aisb);
>> +        airq_iv_release(zdev->aibv);
>> +        zdev->aisb = 0;
>> +        zdev->aibv = NULL;
>> +    }
>> +    spin_unlock_irq(&aift->gait_lock);
>> +    kvm_s390_gisc_unregister(kzdev->kvm, isc);
>
> Don't we need to check the return value?
> And maybe to report it to the caller?

Well, actually, I think we really need to look at the
kvm_s390_gisc_register() call during aif_enable -- I unconditionally
assigned it to the fib when in fact it can also return a negative error
value (which I never check for) -- so I will re-arrange the code in
aif_enable() to do that earlier using a local variable and leave on
error in aif_enable if this fails.

kvm_s390_gisc_register() returns 2 possible errors, which are shared
with gisc_unregister -- So with that change we will detect these errors
(not using GISA, bad guest ISC) at aif_enable time.

So then for gisc_unregister we should really only possibly hit the 3rd
error (guest ISC is not registered). And if for some reason we hit that
error at disable time, well, that's weird and unexpected (s390dbf?) but
as far as userspace is concerned the GAITE is cleared and the gisc is
unregistered, so I think we want to return success still to userspace.
But we must do the checking at gisc_register() time and fail for the
other cases there.