2023-06-12 06:07:35

by Anup Patel

[permalink] [raw]
Subject: [PATCH v2 00/10] RISC-V KVM in-kernel AIA irqchip

This series adds in-kernel AIA irqchip to KVM RISC-V which trap-n-emulate
IMSIC and APLIC MSI-mode for Guest. The APLIC MSI-mode trap-n-emulate is
optional so KVM user space can emulate APLIC entirely in user space.

The use of IMSIC HW guest files to accelerate IMSIC virtualization will be
done as separate series since this depends on AIA irqchip drivers to be
upstreamed. This series has no dependency on the AIA irqchip drivers.

There is also a KVM AIA irq-bypass (or device MSI virtualization) series
under development which depends on this series and upcoming IOMMU driver
series.

These patches (or this series) can also be found in the
riscv_kvm_aia_irqchip_v2 branch at: https://github.com/avpatel/linux.git

Changes since v1:
- Rebased on Linux-6.4-rc6
- Addressed Atish's comment in PATCH6
- Added comments in arch/riscv/include/uapi/asm/kvm.h about APLIC and
IMSIC device attribute type

Anup Patel (10):
RISC-V: KVM: Implement guest external interrupt line management
RISC-V: KVM: Add IMSIC related defines
RISC-V: KVM: Add APLIC related defines
RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero
RISC-V: KVM: Skeletal in-kernel AIA irqchip support
RISC-V: KVM: Implement device interface for AIA irqchip
RISC-V: KVM: Add in-kernel emulation of AIA APLIC
RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip
RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC
RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip

arch/riscv/include/asm/kvm_aia.h | 107 ++-
arch/riscv/include/asm/kvm_aia_aplic.h | 58 ++
arch/riscv/include/asm/kvm_aia_imsic.h | 38 +
arch/riscv/include/asm/kvm_host.h | 4 +
arch/riscv/include/uapi/asm/kvm.h | 72 ++
arch/riscv/kvm/Kconfig | 4 +
arch/riscv/kvm/Makefile | 3 +
arch/riscv/kvm/aia.c | 274 +++++-
arch/riscv/kvm/aia_aplic.c | 617 ++++++++++++++
arch/riscv/kvm/aia_device.c | 672 +++++++++++++++
arch/riscv/kvm/aia_imsic.c | 1083 ++++++++++++++++++++++++
arch/riscv/kvm/main.c | 3 +-
arch/riscv/kvm/vcpu.c | 2 +
arch/riscv/kvm/vm.c | 115 +++
include/uapi/linux/kvm.h | 2 +
15 files changed, 3021 insertions(+), 33 deletions(-)
create mode 100644 arch/riscv/include/asm/kvm_aia_aplic.h
create mode 100644 arch/riscv/include/asm/kvm_aia_imsic.h
create mode 100644 arch/riscv/kvm/aia_aplic.c
create mode 100644 arch/riscv/kvm/aia_device.c
create mode 100644 arch/riscv/kvm/aia_imsic.c

--
2.34.1



2023-06-12 06:07:38

by Anup Patel

[permalink] [raw]
Subject: [PATCH v2 01/10] RISC-V: KVM: Implement guest external interrupt line management

The RISC-V host will have one guest external interrupt line for each
VS-level IMSICs associated with a HART. The guest external interrupt
lines are per-HART resources and hypervisor can use HGEIE, HGEIP, and
HIE CSRs to manage these guest external interrupt lines.

Signed-off-by: Anup Patel <[email protected]>
Reviewed-by: Andrew Jones <[email protected]>
Reviewed-by: Atish Patra <[email protected]>
---
arch/riscv/include/asm/kvm_aia.h | 10 ++
arch/riscv/kvm/aia.c | 244 +++++++++++++++++++++++++++++++
arch/riscv/kvm/main.c | 3 +-
arch/riscv/kvm/vcpu.c | 2 +
4 files changed, 258 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index 1de0717112e5..0938e0cadf80 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -44,10 +44,15 @@ struct kvm_vcpu_aia {

#define irqchip_in_kernel(k) ((k)->arch.aia.in_kernel)

+extern unsigned int kvm_riscv_aia_nr_hgei;
DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
#define kvm_riscv_aia_available() \
static_branch_unlikely(&kvm_riscv_aia_available)

+static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
+{
+}
+
#define KVM_RISCV_AIA_IMSIC_TOPEI (ISELECT_MASK + 1)
static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
unsigned long isel,
@@ -119,6 +124,11 @@ static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm)
{
}

+int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
+ void __iomem **hgei_va, phys_addr_t *hgei_pa);
+void kvm_riscv_aia_free_hgei(int cpu, int hgei);
+void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable);
+
void kvm_riscv_aia_enable(void);
void kvm_riscv_aia_disable(void);
int kvm_riscv_aia_init(void);
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 4f1286fc7f17..1cee75a8c883 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -8,11 +8,47 @@
*/

#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/kvm_host.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
#include <asm/hwcap.h>

+struct aia_hgei_control {
+ raw_spinlock_t lock;
+ unsigned long free_bitmap;
+ struct kvm_vcpu *owners[BITS_PER_LONG];
+};
+static DEFINE_PER_CPU(struct aia_hgei_control, aia_hgei);
+static int hgei_parent_irq;
+
+unsigned int kvm_riscv_aia_nr_hgei;
DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available);

+static int aia_find_hgei(struct kvm_vcpu *owner)
+{
+ int i, hgei;
+ unsigned long flags;
+ struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ hgei = -1;
+ for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) {
+ if (hgctrl->owners[i] == owner) {
+ hgei = i;
+ break;
+ }
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ put_cpu_ptr(&aia_hgei);
+ return hgei;
+}
+
static void aia_set_hvictl(bool ext_irq_pending)
{
unsigned long hvictl;
@@ -56,6 +92,7 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu)

bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
{
+ int hgei;
unsigned long seip;

if (!kvm_riscv_aia_available())
@@ -74,6 +111,10 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip)
return false;

+ hgei = aia_find_hgei(vcpu);
+ if (hgei > 0)
+ return !!(csr_read(CSR_HGEIP) & BIT(hgei));
+
return false;
}

@@ -348,6 +389,143 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
return KVM_INSN_EXIT_TO_USER_SPACE;
}

+int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
+ void __iomem **hgei_va, phys_addr_t *hgei_pa)
+{
+ int ret = -ENOENT;
+ unsigned long flags;
+ struct aia_hgei_control *hgctrl = per_cpu_ptr(&aia_hgei, cpu);
+
+ if (!kvm_riscv_aia_available() || !hgctrl)
+ return -ENODEV;
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ if (hgctrl->free_bitmap) {
+ ret = __ffs(hgctrl->free_bitmap);
+ hgctrl->free_bitmap &= ~BIT(ret);
+ hgctrl->owners[ret] = owner;
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ /* TODO: To be updated later by AIA in-kernel irqchip support */
+ if (hgei_va)
+ *hgei_va = NULL;
+ if (hgei_pa)
+ *hgei_pa = 0;
+
+ return ret;
+}
+
+void kvm_riscv_aia_free_hgei(int cpu, int hgei)
+{
+ unsigned long flags;
+ struct aia_hgei_control *hgctrl = per_cpu_ptr(&aia_hgei, cpu);
+
+ if (!kvm_riscv_aia_available() || !hgctrl)
+ return;
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ if (hgei > 0 && hgei <= kvm_riscv_aia_nr_hgei) {
+ if (!(hgctrl->free_bitmap & BIT(hgei))) {
+ hgctrl->free_bitmap |= BIT(hgei);
+ hgctrl->owners[hgei] = NULL;
+ }
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+}
+
+void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable)
+{
+ int hgei;
+
+ if (!kvm_riscv_aia_available())
+ return;
+
+ hgei = aia_find_hgei(owner);
+ if (hgei > 0) {
+ if (enable)
+ csr_set(CSR_HGEIE, BIT(hgei));
+ else
+ csr_clear(CSR_HGEIE, BIT(hgei));
+ }
+}
+
+static irqreturn_t hgei_interrupt(int irq, void *dev_id)
+{
+ int i;
+ unsigned long hgei_mask, flags;
+ struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
+
+ hgei_mask = csr_read(CSR_HGEIP) & csr_read(CSR_HGEIE);
+ csr_clear(CSR_HGEIE, hgei_mask);
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ for_each_set_bit(i, &hgei_mask, BITS_PER_LONG) {
+ if (hgctrl->owners[i])
+ kvm_vcpu_kick(hgctrl->owners[i]);
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ put_cpu_ptr(&aia_hgei);
+ return IRQ_HANDLED;
+}
+
+static int aia_hgei_init(void)
+{
+ int cpu, rc;
+ struct irq_domain *domain;
+ struct aia_hgei_control *hgctrl;
+
+ /* Initialize per-CPU guest external interrupt line management */
+ for_each_possible_cpu(cpu) {
+ hgctrl = per_cpu_ptr(&aia_hgei, cpu);
+ raw_spin_lock_init(&hgctrl->lock);
+ if (kvm_riscv_aia_nr_hgei) {
+ hgctrl->free_bitmap =
+ BIT(kvm_riscv_aia_nr_hgei + 1) - 1;
+ hgctrl->free_bitmap &= ~BIT(0);
+ } else
+ hgctrl->free_bitmap = 0;
+ }
+
+ /* Find INTC irq domain */
+ domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(),
+ DOMAIN_BUS_ANY);
+ if (!domain) {
+ kvm_err("unable to find INTC domain\n");
+ return -ENOENT;
+ }
+
+ /* Map per-CPU SGEI interrupt from INTC domain */
+ hgei_parent_irq = irq_create_mapping(domain, IRQ_S_GEXT);
+ if (!hgei_parent_irq) {
+ kvm_err("unable to map SGEI IRQ\n");
+ return -ENOMEM;
+ }
+
+ /* Request per-CPU SGEI interrupt */
+ rc = request_percpu_irq(hgei_parent_irq, hgei_interrupt,
+ "riscv-kvm", &aia_hgei);
+ if (rc) {
+ kvm_err("failed to request SGEI IRQ\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+static void aia_hgei_exit(void)
+{
+ /* Free per-CPU SGEI interrupt */
+ free_percpu_irq(hgei_parent_irq, &aia_hgei);
+}
+
void kvm_riscv_aia_enable(void)
{
if (!kvm_riscv_aia_available())
@@ -362,21 +540,82 @@ void kvm_riscv_aia_enable(void)
csr_write(CSR_HVIPRIO1H, 0x0);
csr_write(CSR_HVIPRIO2H, 0x0);
#endif
+
+ /* Enable per-CPU SGEI interrupt */
+ enable_percpu_irq(hgei_parent_irq,
+ irq_get_trigger_type(hgei_parent_irq));
+ csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
}

void kvm_riscv_aia_disable(void)
{
+ int i;
+ unsigned long flags;
+ struct kvm_vcpu *vcpu;
+ struct aia_hgei_control *hgctrl;
+
if (!kvm_riscv_aia_available())
return;
+ hgctrl = get_cpu_ptr(&aia_hgei);
+
+ /* Disable per-CPU SGEI interrupt */
+ csr_clear(CSR_HIE, BIT(IRQ_S_GEXT));
+ disable_percpu_irq(hgei_parent_irq);

aia_set_hvictl(false);
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ for (i = 0; i <= kvm_riscv_aia_nr_hgei; i++) {
+ vcpu = hgctrl->owners[i];
+ if (!vcpu)
+ continue;
+
+ /*
+ * We release hgctrl->lock before notifying IMSIC
+ * so that we don't have lock ordering issues.
+ */
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ /* Notify IMSIC */
+ kvm_riscv_vcpu_aia_imsic_release(vcpu);
+
+ /*
+ * Wakeup VCPU if it was blocked so that it can
+ * run on other HARTs
+ */
+ if (csr_read(CSR_HGEIE) & BIT(i)) {
+ csr_clear(CSR_HGEIE, BIT(i));
+ kvm_vcpu_kick(vcpu);
+ }
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ put_cpu_ptr(&aia_hgei);
}

int kvm_riscv_aia_init(void)
{
+ int rc;
+
if (!riscv_isa_extension_available(NULL, SxAIA))
return -ENODEV;

+ /* Figure-out number of bits in HGEIE */
+ csr_write(CSR_HGEIE, -1UL);
+ kvm_riscv_aia_nr_hgei = fls_long(csr_read(CSR_HGEIE));
+ csr_write(CSR_HGEIE, 0);
+ if (kvm_riscv_aia_nr_hgei)
+ kvm_riscv_aia_nr_hgei--;
+
+ /* Initialize guest external interrupt line management */
+ rc = aia_hgei_init();
+ if (rc)
+ return rc;
+
/* Enable KVM AIA support */
static_branch_enable(&kvm_riscv_aia_available);

@@ -385,4 +624,9 @@ int kvm_riscv_aia_init(void)

void kvm_riscv_aia_exit(void)
{
+ if (!kvm_riscv_aia_available())
+ return;
+
+ /* Cleanup the HGEI state */
+ aia_hgei_exit();
}
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index a7112d583637..48ae0d4b3932 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -116,7 +116,8 @@ static int __init riscv_kvm_init(void)
kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());

if (kvm_riscv_aia_available())
- kvm_info("AIA available\n");
+ kvm_info("AIA available with %d guest external interrupts\n",
+ kvm_riscv_aia_nr_hgei);

rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
if (rc) {
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 8bd9f2a8a0b9..2db62c6c0d3e 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -250,10 +250,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)

void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
+ kvm_riscv_aia_wakeon_hgei(vcpu, true);
}

void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
+ kvm_riscv_aia_wakeon_hgei(vcpu, false);
}

int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
--
2.34.1


2023-06-12 06:08:27

by Anup Patel

[permalink] [raw]
Subject: [PATCH v2 07/10] RISC-V: KVM: Add in-kernel emulation of AIA APLIC

There is no virtualization support in AIA APLIC so we add in-kernel
emulation of AIA APLIC which only supports MSI-mode (i.e. wired
interrupts forwarded to AIA IMSIC as MSIs).

Signed-off-by: Anup Patel <[email protected]>
---
arch/riscv/include/asm/kvm_aia.h | 17 +-
arch/riscv/kvm/Makefile | 1 +
arch/riscv/kvm/aia_aplic.c | 574 +++++++++++++++++++++++++++++++
3 files changed, 578 insertions(+), 14 deletions(-)
create mode 100644 arch/riscv/kvm/aia_aplic.c

diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index a1281ebc9b92..f6bd8523395f 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -129,20 +129,9 @@ static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
{
}

-static inline int kvm_riscv_aia_aplic_inject(struct kvm *kvm,
- u32 source, bool level)
-{
- return 0;
-}
-
-static inline int kvm_riscv_aia_aplic_init(struct kvm *kvm)
-{
- return 0;
-}
-
-static inline void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm)
-{
-}
+int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level);
+int kvm_riscv_aia_aplic_init(struct kvm *kvm);
+void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm);

#ifdef CONFIG_32BIT
void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu);
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index dd69ebe098bd..94c43702c765 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -28,3 +28,4 @@ kvm-y += vcpu_timer.o
kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
kvm-y += aia.o
kvm-y += aia_device.o
+kvm-y += aia_aplic.o
diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
new file mode 100644
index 000000000000..1b0a4df64815
--- /dev/null
+++ b/arch/riscv/kvm/aia_aplic.c
@@ -0,0 +1,574 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ * Anup Patel <[email protected]>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/math.h>
+#include <linux/spinlock.h>
+#include <linux/swab.h>
+#include <kvm/iodev.h>
+#include <asm/kvm_aia_aplic.h>
+
+struct aplic_irq {
+ raw_spinlock_t lock;
+ u32 sourcecfg;
+ u32 state;
+#define APLIC_IRQ_STATE_PENDING BIT(0)
+#define APLIC_IRQ_STATE_ENABLED BIT(1)
+#define APLIC_IRQ_STATE_ENPEND (APLIC_IRQ_STATE_PENDING | \
+ APLIC_IRQ_STATE_ENABLED)
+#define APLIC_IRQ_STATE_INPUT BIT(8)
+ u32 target;
+};
+
+struct aplic {
+ struct kvm_io_device iodev;
+
+ u32 domaincfg;
+ u32 genmsi;
+
+ u32 nr_irqs;
+ u32 nr_words;
+ struct aplic_irq *irqs;
+};
+
+static u32 aplic_read_sourcecfg(struct aplic *aplic, u32 irq)
+{
+ u32 ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return 0;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = irqd->sourcecfg;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_write_sourcecfg(struct aplic *aplic, u32 irq, u32 val)
+{
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return;
+ irqd = &aplic->irqs[irq];
+
+ if (val & APLIC_SOURCECFG_D)
+ val = 0;
+ else
+ val &= APLIC_SOURCECFG_SM_MASK;
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ irqd->sourcecfg = val;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static u32 aplic_read_target(struct aplic *aplic, u32 irq)
+{
+ u32 ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return 0;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = irqd->target;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_write_target(struct aplic *aplic, u32 irq, u32 val)
+{
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return;
+ irqd = &aplic->irqs[irq];
+
+ val &= APLIC_TARGET_EIID_MASK |
+ (APLIC_TARGET_HART_IDX_MASK << APLIC_TARGET_HART_IDX_SHIFT) |
+ (APLIC_TARGET_GUEST_IDX_MASK << APLIC_TARGET_GUEST_IDX_SHIFT);
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ irqd->target = val;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static bool aplic_read_pending(struct aplic *aplic, u32 irq)
+{
+ bool ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return false;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = (irqd->state & APLIC_IRQ_STATE_PENDING) ? true : false;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
+{
+ unsigned long flags, sm;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+
+ sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
+ if (!pending &&
+ ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
+ (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
+ goto skip_write_pending;
+
+ if (pending)
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ else
+ irqd->state &= ~APLIC_IRQ_STATE_PENDING;
+
+skip_write_pending:
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static bool aplic_read_enabled(struct aplic *aplic, u32 irq)
+{
+ bool ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return false;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = (irqd->state & APLIC_IRQ_STATE_ENABLED) ? true : false;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
+{
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ if (enabled)
+ irqd->state |= APLIC_IRQ_STATE_ENABLED;
+ else
+ irqd->state &= ~APLIC_IRQ_STATE_ENABLED;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static bool aplic_read_input(struct aplic *aplic, u32 irq)
+{
+ bool ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return false;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_inject_msi(struct kvm *kvm, u32 irq, u32 target)
+{
+ u32 hart_idx, guest_idx, eiid;
+
+ hart_idx = target >> APLIC_TARGET_HART_IDX_SHIFT;
+ hart_idx &= APLIC_TARGET_HART_IDX_MASK;
+ guest_idx = target >> APLIC_TARGET_GUEST_IDX_SHIFT;
+ guest_idx &= APLIC_TARGET_GUEST_IDX_MASK;
+ eiid = target & APLIC_TARGET_EIID_MASK;
+ kvm_riscv_aia_inject_msi_by_id(kvm, hart_idx, guest_idx, eiid);
+}
+
+static void aplic_update_irq_range(struct kvm *kvm, u32 first, u32 last)
+{
+ bool inject;
+ u32 irq, target;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if (!(aplic->domaincfg & APLIC_DOMAINCFG_IE))
+ return;
+
+ for (irq = first; irq <= last; irq++) {
+ if (!irq || aplic->nr_irqs <= irq)
+ continue;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+
+ inject = false;
+ target = irqd->target;
+ if (irqd->state & APLIC_IRQ_STATE_ENPEND) {
+ irqd->state &= ~APLIC_IRQ_STATE_PENDING;
+ inject = true;
+ }
+
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ if (inject)
+ aplic_inject_msi(kvm, irq, target);
+ }
+}
+
+int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level)
+{
+ u32 target;
+ bool inject = false, ie;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if (!aplic || !source || (aplic->nr_irqs <= source))
+ return -ENODEV;
+ irqd = &aplic->irqs[source];
+ ie = (aplic->domaincfg & APLIC_DOMAINCFG_IE) ? true : false;
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+
+ if (irqd->sourcecfg & APLIC_SOURCECFG_D)
+ goto skip_unlock;
+
+ switch (irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK) {
+ case APLIC_SOURCECFG_SM_EDGE_RISE:
+ if (level && !(irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ !(irqd->state & APLIC_IRQ_STATE_PENDING))
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ break;
+ case APLIC_SOURCECFG_SM_EDGE_FALL:
+ if (!level && (irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ !(irqd->state & APLIC_IRQ_STATE_PENDING))
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ break;
+ case APLIC_SOURCECFG_SM_LEVEL_HIGH:
+ if (level && !(irqd->state & APLIC_IRQ_STATE_PENDING))
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ break;
+ case APLIC_SOURCECFG_SM_LEVEL_LOW:
+ if (!level && !(irqd->state & APLIC_IRQ_STATE_PENDING))
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ break;
+ }
+
+ if (level)
+ irqd->state |= APLIC_IRQ_STATE_INPUT;
+ else
+ irqd->state &= ~APLIC_IRQ_STATE_INPUT;
+
+ target = irqd->target;
+ if (ie && (irqd->state & APLIC_IRQ_STATE_ENPEND)) {
+ irqd->state &= ~APLIC_IRQ_STATE_PENDING;
+ inject = true;
+ }
+
+skip_unlock:
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ if (inject)
+ aplic_inject_msi(kvm, source, target);
+
+ return 0;
+}
+
+static u32 aplic_read_input_word(struct aplic *aplic, u32 word)
+{
+ u32 i, ret = 0;
+
+ for (i = 0; i < 32; i++)
+ ret |= aplic_read_input(aplic, word * 32 + i) ? BIT(i) : 0;
+
+ return ret;
+}
+
+static u32 aplic_read_pending_word(struct aplic *aplic, u32 word)
+{
+ u32 i, ret = 0;
+
+ for (i = 0; i < 32; i++)
+ ret |= aplic_read_pending(aplic, word * 32 + i) ? BIT(i) : 0;
+
+ return ret;
+}
+
+static void aplic_write_pending_word(struct aplic *aplic, u32 word,
+ u32 val, bool pending)
+{
+ u32 i;
+
+ for (i = 0; i < 32; i++) {
+ if (val & BIT(i))
+ aplic_write_pending(aplic, word * 32 + i, pending);
+ }
+}
+
+static u32 aplic_read_enabled_word(struct aplic *aplic, u32 word)
+{
+ u32 i, ret = 0;
+
+ for (i = 0; i < 32; i++)
+ ret |= aplic_read_enabled(aplic, word * 32 + i) ? BIT(i) : 0;
+
+ return ret;
+}
+
+static void aplic_write_enabled_word(struct aplic *aplic, u32 word,
+ u32 val, bool enabled)
+{
+ u32 i;
+
+ for (i = 0; i < 32; i++) {
+ if (val & BIT(i))
+ aplic_write_enabled(aplic, word * 32 + i, enabled);
+ }
+}
+
+static int aplic_mmio_read_offset(struct kvm *kvm, gpa_t off, u32 *val32)
+{
+ u32 i;
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if ((off & 0x3) != 0)
+ return -EOPNOTSUPP;
+
+ if (off == APLIC_DOMAINCFG) {
+ *val32 = APLIC_DOMAINCFG_RDONLY |
+ aplic->domaincfg | APLIC_DOMAINCFG_DM;
+ } else if ((off >= APLIC_SOURCECFG_BASE) &&
+ (off < (APLIC_SOURCECFG_BASE + (aplic->nr_irqs - 1) * 4))) {
+ i = ((off - APLIC_SOURCECFG_BASE) >> 2) + 1;
+ *val32 = aplic_read_sourcecfg(aplic, i);
+ } else if ((off >= APLIC_SETIP_BASE) &&
+ (off < (APLIC_SETIP_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_SETIP_BASE) >> 2;
+ *val32 = aplic_read_pending_word(aplic, i);
+ } else if (off == APLIC_SETIPNUM) {
+ *val32 = 0;
+ } else if ((off >= APLIC_CLRIP_BASE) &&
+ (off < (APLIC_CLRIP_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_CLRIP_BASE) >> 2;
+ *val32 = aplic_read_input_word(aplic, i);
+ } else if (off == APLIC_CLRIPNUM) {
+ *val32 = 0;
+ } else if ((off >= APLIC_SETIE_BASE) &&
+ (off < (APLIC_SETIE_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_SETIE_BASE) >> 2;
+ *val32 = aplic_read_enabled_word(aplic, i);
+ } else if (off == APLIC_SETIENUM) {
+ *val32 = 0;
+ } else if ((off >= APLIC_CLRIE_BASE) &&
+ (off < (APLIC_CLRIE_BASE + aplic->nr_words * 4))) {
+ *val32 = 0;
+ } else if (off == APLIC_CLRIENUM) {
+ *val32 = 0;
+ } else if (off == APLIC_SETIPNUM_LE) {
+ *val32 = 0;
+ } else if (off == APLIC_SETIPNUM_BE) {
+ *val32 = 0;
+ } else if (off == APLIC_GENMSI) {
+ *val32 = aplic->genmsi;
+ } else if ((off >= APLIC_TARGET_BASE) &&
+ (off < (APLIC_TARGET_BASE + (aplic->nr_irqs - 1) * 4))) {
+ i = ((off - APLIC_TARGET_BASE) >> 2) + 1;
+ *val32 = aplic_read_target(aplic, i);
+ } else
+ return -ENODEV;
+
+ return 0;
+}
+
+static int aplic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, int len, void *val)
+{
+ if (len != 4)
+ return -EOPNOTSUPP;
+
+ return aplic_mmio_read_offset(vcpu->kvm,
+ addr - vcpu->kvm->arch.aia.aplic_addr,
+ val);
+}
+
+static int aplic_mmio_write_offset(struct kvm *kvm, gpa_t off, u32 val32)
+{
+ u32 i;
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if ((off & 0x3) != 0)
+ return -EOPNOTSUPP;
+
+ if (off == APLIC_DOMAINCFG) {
+ /* Only IE bit writeable */
+ aplic->domaincfg = val32 & APLIC_DOMAINCFG_IE;
+ } else if ((off >= APLIC_SOURCECFG_BASE) &&
+ (off < (APLIC_SOURCECFG_BASE + (aplic->nr_irqs - 1) * 4))) {
+ i = ((off - APLIC_SOURCECFG_BASE) >> 2) + 1;
+ aplic_write_sourcecfg(aplic, i, val32);
+ } else if ((off >= APLIC_SETIP_BASE) &&
+ (off < (APLIC_SETIP_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_SETIP_BASE) >> 2;
+ aplic_write_pending_word(aplic, i, val32, true);
+ } else if (off == APLIC_SETIPNUM) {
+ aplic_write_pending(aplic, val32, true);
+ } else if ((off >= APLIC_CLRIP_BASE) &&
+ (off < (APLIC_CLRIP_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_CLRIP_BASE) >> 2;
+ aplic_write_pending_word(aplic, i, val32, false);
+ } else if (off == APLIC_CLRIPNUM) {
+ aplic_write_pending(aplic, val32, false);
+ } else if ((off >= APLIC_SETIE_BASE) &&
+ (off < (APLIC_SETIE_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_SETIE_BASE) >> 2;
+ aplic_write_enabled_word(aplic, i, val32, true);
+ } else if (off == APLIC_SETIENUM) {
+ aplic_write_enabled(aplic, val32, true);
+ } else if ((off >= APLIC_CLRIE_BASE) &&
+ (off < (APLIC_CLRIE_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_CLRIE_BASE) >> 2;
+ aplic_write_enabled_word(aplic, i, val32, false);
+ } else if (off == APLIC_CLRIENUM) {
+ aplic_write_enabled(aplic, val32, false);
+ } else if (off == APLIC_SETIPNUM_LE) {
+ aplic_write_pending(aplic, val32, true);
+ } else if (off == APLIC_SETIPNUM_BE) {
+ aplic_write_pending(aplic, __swab32(val32), true);
+ } else if (off == APLIC_GENMSI) {
+ aplic->genmsi = val32 & ~(APLIC_TARGET_GUEST_IDX_MASK <<
+ APLIC_TARGET_GUEST_IDX_SHIFT);
+ kvm_riscv_aia_inject_msi_by_id(kvm,
+ val32 >> APLIC_TARGET_HART_IDX_SHIFT, 0,
+ val32 & APLIC_TARGET_EIID_MASK);
+ } else if ((off >= APLIC_TARGET_BASE) &&
+ (off < (APLIC_TARGET_BASE + (aplic->nr_irqs - 1) * 4))) {
+ i = ((off - APLIC_TARGET_BASE) >> 2) + 1;
+ aplic_write_target(aplic, i, val32);
+ } else
+ return -ENODEV;
+
+ aplic_update_irq_range(kvm, 1, aplic->nr_irqs - 1);
+
+ return 0;
+}
+
+static int aplic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, int len, const void *val)
+{
+ if (len != 4)
+ return -EOPNOTSUPP;
+
+ return aplic_mmio_write_offset(vcpu->kvm,
+ addr - vcpu->kvm->arch.aia.aplic_addr,
+ *((const u32 *)val));
+}
+
+static struct kvm_io_device_ops aplic_iodoev_ops = {
+ .read = aplic_mmio_read,
+ .write = aplic_mmio_write,
+};
+
+int kvm_riscv_aia_aplic_init(struct kvm *kvm)
+{
+ int i, ret = 0;
+ struct aplic *aplic;
+
+ /* Do nothing if we have zero sources */
+ if (!kvm->arch.aia.nr_sources)
+ return 0;
+
+ /* Allocate APLIC global state */
+ aplic = kzalloc(sizeof(*aplic), GFP_KERNEL);
+ if (!aplic)
+ return -ENOMEM;
+ kvm->arch.aia.aplic_state = aplic;
+
+ /* Setup APLIC IRQs */
+ aplic->nr_irqs = kvm->arch.aia.nr_sources + 1;
+ aplic->nr_words = DIV_ROUND_UP(aplic->nr_irqs, 32);
+ aplic->irqs = kcalloc(aplic->nr_irqs,
+ sizeof(*aplic->irqs), GFP_KERNEL);
+ if (!aplic->irqs) {
+ ret = -ENOMEM;
+ goto fail_free_aplic;
+ }
+ for (i = 0; i < aplic->nr_irqs; i++)
+ raw_spin_lock_init(&aplic->irqs[i].lock);
+
+ /* Setup IO device */
+ kvm_iodevice_init(&aplic->iodev, &aplic_iodoev_ops);
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
+ kvm->arch.aia.aplic_addr,
+ KVM_DEV_RISCV_APLIC_SIZE,
+ &aplic->iodev);
+ mutex_unlock(&kvm->slots_lock);
+ if (ret)
+ goto fail_free_aplic_irqs;
+
+ /* Setup default IRQ routing */
+ ret = kvm_riscv_setup_default_irq_routing(kvm, aplic->nr_irqs);
+ if (ret)
+ goto fail_unreg_iodev;
+
+ return 0;
+
+fail_unreg_iodev:
+ mutex_lock(&kvm->slots_lock);
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &aplic->iodev);
+ mutex_unlock(&kvm->slots_lock);
+fail_free_aplic_irqs:
+ kfree(aplic->irqs);
+fail_free_aplic:
+ kvm->arch.aia.aplic_state = NULL;
+ kfree(aplic);
+ return ret;
+}
+
+void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm)
+{
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if (!aplic)
+ return;
+
+ mutex_lock(&kvm->slots_lock);
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &aplic->iodev);
+ mutex_unlock(&kvm->slots_lock);
+
+ kfree(aplic->irqs);
+
+ kvm->arch.aia.aplic_state = NULL;
+ kfree(aplic);
+}
--
2.34.1


2023-06-12 06:10:33

by Anup Patel

[permalink] [raw]
Subject: [PATCH v2 02/10] RISC-V: KVM: Add IMSIC related defines

We add IMSIC related defines in a separate header so that different
parts of KVM code can share it. Once AIA drivers are merged will
have a common IMSIC header shared by both KVM and IRQCHIP driver.

Signed-off-by: Anup Patel <[email protected]>
Reviewed-by: Atish Patra <[email protected]>
---
arch/riscv/include/asm/kvm_aia_imsic.h | 38 ++++++++++++++++++++++++++
arch/riscv/kvm/aia.c | 3 +-
2 files changed, 39 insertions(+), 2 deletions(-)
create mode 100644 arch/riscv/include/asm/kvm_aia_imsic.h

diff --git a/arch/riscv/include/asm/kvm_aia_imsic.h b/arch/riscv/include/asm/kvm_aia_imsic.h
new file mode 100644
index 000000000000..da5881d2bde0
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_aia_imsic.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ */
+#ifndef __KVM_RISCV_AIA_IMSIC_H
+#define __KVM_RISCV_AIA_IMSIC_H
+
+#include <linux/types.h>
+#include <asm/csr.h>
+
+#define IMSIC_MMIO_PAGE_SHIFT 12
+#define IMSIC_MMIO_PAGE_SZ (1UL << IMSIC_MMIO_PAGE_SHIFT)
+#define IMSIC_MMIO_PAGE_LE 0x00
+#define IMSIC_MMIO_PAGE_BE 0x04
+
+#define IMSIC_MIN_ID 63
+#define IMSIC_MAX_ID 2048
+
+#define IMSIC_EIDELIVERY 0x70
+
+#define IMSIC_EITHRESHOLD 0x72
+
+#define IMSIC_EIP0 0x80
+#define IMSIC_EIP63 0xbf
+#define IMSIC_EIPx_BITS 32
+
+#define IMSIC_EIE0 0xc0
+#define IMSIC_EIE63 0xff
+#define IMSIC_EIEx_BITS 32
+
+#define IMSIC_FIRST IMSIC_EIDELIVERY
+#define IMSIC_LAST IMSIC_EIE63
+
+#define IMSIC_MMIO_SETIPNUM_LE 0x00
+#define IMSIC_MMIO_SETIPNUM_BE 0x04
+
+#endif
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 1cee75a8c883..c78c06d99e39 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -15,6 +15,7 @@
#include <linux/percpu.h>
#include <linux/spinlock.h>
#include <asm/hwcap.h>
+#include <asm/kvm_aia_imsic.h>

struct aia_hgei_control {
raw_spinlock_t lock;
@@ -364,8 +365,6 @@ static int aia_rmw_iprio(struct kvm_vcpu *vcpu, unsigned int isel,
return KVM_INSN_CONTINUE_NEXT_SEPC;
}

-#define IMSIC_FIRST 0x70
-#define IMSIC_LAST 0xff
int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
unsigned long *val, unsigned long new_val,
unsigned long wr_mask)
--
2.34.1


2023-06-12 06:13:34

by Anup Patel

[permalink] [raw]
Subject: [PATCH v2 09/10] RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC

We can have AIA IMSIC support for both HS-level and VS-level but
the VS-level IMSICs are optional. We use the VS-level IMSICs for
Guest/VM whenever available otherwise we fallback to software
emulation of AIA IMSIC.

This patch adds in-kernel virtualization of AIA IMSIC.

Signed-off-by: Anup Patel <[email protected]>
Reviewed-by: Atish Patra <[email protected]>
---
arch/riscv/include/asm/kvm_aia.h | 46 +-
arch/riscv/kvm/Makefile | 1 +
arch/riscv/kvm/aia_imsic.c | 913 +++++++++++++++++++++++++++++++
3 files changed, 924 insertions(+), 36 deletions(-)
create mode 100644 arch/riscv/kvm/aia_imsic.c

diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index ba939c0054aa..a4f6ebf90e31 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -90,44 +90,18 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);

extern struct kvm_device_ops kvm_riscv_aia_device_ops;

-static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
-{
- return 1;
-}
+void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu);

#define KVM_RISCV_AIA_IMSIC_TOPEI (ISELECT_MASK + 1)
-static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
- unsigned long isel,
- unsigned long *val,
- unsigned long new_val,
- unsigned long wr_mask)
-{
- return 0;
-}
-
-static inline void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
- u32 guest_index, u32 offset,
- u32 iid)
-{
- return 0;
-}
-
-static inline int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
-static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
-{
-}
+int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel,
+ unsigned long *val, unsigned long new_val,
+ unsigned long wr_mask);
+void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
+ u32 guest_index, u32 offset, u32 iid);
+int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu);

int kvm_riscv_aia_aplic_set_attr(struct kvm *kvm, unsigned long type, u32 v);
int kvm_riscv_aia_aplic_get_attr(struct kvm *kvm, unsigned long type, u32 *v);
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 94c43702c765..c1d1356387ff 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -29,3 +29,4 @@ kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
kvm-y += aia.o
kvm-y += aia_device.o
kvm-y += aia_aplic.o
+kvm-y += aia_imsic.o
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
new file mode 100644
index 000000000000..2dc09dcb8ab5
--- /dev/null
+++ b/arch/riscv/kvm/aia_imsic.c
@@ -0,0 +1,913 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ * Anup Patel <[email protected]>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/kvm_host.h>
+#include <linux/math.h>
+#include <linux/spinlock.h>
+#include <linux/swab.h>
+#include <kvm/iodev.h>
+#include <asm/csr.h>
+#include <asm/kvm_aia_imsic.h>
+
+#define IMSIC_MAX_EIX (IMSIC_MAX_ID / BITS_PER_TYPE(u64))
+
+struct imsic_mrif_eix {
+ unsigned long eip[BITS_PER_TYPE(u64) / BITS_PER_LONG];
+ unsigned long eie[BITS_PER_TYPE(u64) / BITS_PER_LONG];
+};
+
+struct imsic_mrif {
+ struct imsic_mrif_eix eix[IMSIC_MAX_EIX];
+ unsigned long eithreshold;
+ unsigned long eidelivery;
+};
+
+struct imsic {
+ struct kvm_io_device iodev;
+
+ u32 nr_msis;
+ u32 nr_eix;
+ u32 nr_hw_eix;
+
+ /*
+ * At any point in time, the register state is in
+ * one of the following places:
+ *
+ * 1) Hardware: IMSIC VS-file (vsfile_cpu >= 0)
+ * 2) Software: IMSIC SW-file (vsfile_cpu < 0)
+ */
+
+ /* IMSIC VS-file */
+ rwlock_t vsfile_lock;
+ int vsfile_cpu;
+ int vsfile_hgei;
+ void __iomem *vsfile_va;
+ phys_addr_t vsfile_pa;
+
+ /* IMSIC SW-file */
+ struct imsic_mrif *swfile;
+ phys_addr_t swfile_pa;
+};
+
+#define imsic_vs_csr_read(__c) \
+({ \
+ unsigned long __r; \
+ csr_write(CSR_VSISELECT, __c); \
+ __r = csr_read(CSR_VSIREG); \
+ __r; \
+})
+
+#define imsic_read_switchcase(__ireg) \
+ case __ireg: \
+ return imsic_vs_csr_read(__ireg);
+#define imsic_read_switchcase_2(__ireg) \
+ imsic_read_switchcase(__ireg + 0) \
+ imsic_read_switchcase(__ireg + 1)
+#define imsic_read_switchcase_4(__ireg) \
+ imsic_read_switchcase_2(__ireg + 0) \
+ imsic_read_switchcase_2(__ireg + 2)
+#define imsic_read_switchcase_8(__ireg) \
+ imsic_read_switchcase_4(__ireg + 0) \
+ imsic_read_switchcase_4(__ireg + 4)
+#define imsic_read_switchcase_16(__ireg) \
+ imsic_read_switchcase_8(__ireg + 0) \
+ imsic_read_switchcase_8(__ireg + 8)
+#define imsic_read_switchcase_32(__ireg) \
+ imsic_read_switchcase_16(__ireg + 0) \
+ imsic_read_switchcase_16(__ireg + 16)
+#define imsic_read_switchcase_64(__ireg) \
+ imsic_read_switchcase_32(__ireg + 0) \
+ imsic_read_switchcase_32(__ireg + 32)
+
+static unsigned long imsic_eix_read(int ireg)
+{
+ switch (ireg) {
+ imsic_read_switchcase_64(IMSIC_EIP0)
+ imsic_read_switchcase_64(IMSIC_EIE0)
+ };
+
+ return 0;
+}
+
+#define imsic_vs_csr_swap(__c, __v) \
+({ \
+ unsigned long __r; \
+ csr_write(CSR_VSISELECT, __c); \
+ __r = csr_swap(CSR_VSIREG, __v); \
+ __r; \
+})
+
+#define imsic_swap_switchcase(__ireg, __v) \
+ case __ireg: \
+ return imsic_vs_csr_swap(__ireg, __v);
+#define imsic_swap_switchcase_2(__ireg, __v) \
+ imsic_swap_switchcase(__ireg + 0, __v) \
+ imsic_swap_switchcase(__ireg + 1, __v)
+#define imsic_swap_switchcase_4(__ireg, __v) \
+ imsic_swap_switchcase_2(__ireg + 0, __v) \
+ imsic_swap_switchcase_2(__ireg + 2, __v)
+#define imsic_swap_switchcase_8(__ireg, __v) \
+ imsic_swap_switchcase_4(__ireg + 0, __v) \
+ imsic_swap_switchcase_4(__ireg + 4, __v)
+#define imsic_swap_switchcase_16(__ireg, __v) \
+ imsic_swap_switchcase_8(__ireg + 0, __v) \
+ imsic_swap_switchcase_8(__ireg + 8, __v)
+#define imsic_swap_switchcase_32(__ireg, __v) \
+ imsic_swap_switchcase_16(__ireg + 0, __v) \
+ imsic_swap_switchcase_16(__ireg + 16, __v)
+#define imsic_swap_switchcase_64(__ireg, __v) \
+ imsic_swap_switchcase_32(__ireg + 0, __v) \
+ imsic_swap_switchcase_32(__ireg + 32, __v)
+
+static unsigned long imsic_eix_swap(int ireg, unsigned long val)
+{
+ switch (ireg) {
+ imsic_swap_switchcase_64(IMSIC_EIP0, val)
+ imsic_swap_switchcase_64(IMSIC_EIE0, val)
+ };
+
+ return 0;
+}
+
+#define imsic_vs_csr_write(__c, __v) \
+do { \
+ csr_write(CSR_VSISELECT, __c); \
+ csr_write(CSR_VSIREG, __v); \
+} while (0)
+
+#define imsic_write_switchcase(__ireg, __v) \
+ case __ireg: \
+ imsic_vs_csr_write(__ireg, __v); \
+ break;
+#define imsic_write_switchcase_2(__ireg, __v) \
+ imsic_write_switchcase(__ireg + 0, __v) \
+ imsic_write_switchcase(__ireg + 1, __v)
+#define imsic_write_switchcase_4(__ireg, __v) \
+ imsic_write_switchcase_2(__ireg + 0, __v) \
+ imsic_write_switchcase_2(__ireg + 2, __v)
+#define imsic_write_switchcase_8(__ireg, __v) \
+ imsic_write_switchcase_4(__ireg + 0, __v) \
+ imsic_write_switchcase_4(__ireg + 4, __v)
+#define imsic_write_switchcase_16(__ireg, __v) \
+ imsic_write_switchcase_8(__ireg + 0, __v) \
+ imsic_write_switchcase_8(__ireg + 8, __v)
+#define imsic_write_switchcase_32(__ireg, __v) \
+ imsic_write_switchcase_16(__ireg + 0, __v) \
+ imsic_write_switchcase_16(__ireg + 16, __v)
+#define imsic_write_switchcase_64(__ireg, __v) \
+ imsic_write_switchcase_32(__ireg + 0, __v) \
+ imsic_write_switchcase_32(__ireg + 32, __v)
+
+static void imsic_eix_write(int ireg, unsigned long val)
+{
+ switch (ireg) {
+ imsic_write_switchcase_64(IMSIC_EIP0, val)
+ imsic_write_switchcase_64(IMSIC_EIE0, val)
+ };
+}
+
+#define imsic_vs_csr_set(__c, __v) \
+do { \
+ csr_write(CSR_VSISELECT, __c); \
+ csr_set(CSR_VSIREG, __v); \
+} while (0)
+
+#define imsic_set_switchcase(__ireg, __v) \
+ case __ireg: \
+ imsic_vs_csr_set(__ireg, __v); \
+ break;
+#define imsic_set_switchcase_2(__ireg, __v) \
+ imsic_set_switchcase(__ireg + 0, __v) \
+ imsic_set_switchcase(__ireg + 1, __v)
+#define imsic_set_switchcase_4(__ireg, __v) \
+ imsic_set_switchcase_2(__ireg + 0, __v) \
+ imsic_set_switchcase_2(__ireg + 2, __v)
+#define imsic_set_switchcase_8(__ireg, __v) \
+ imsic_set_switchcase_4(__ireg + 0, __v) \
+ imsic_set_switchcase_4(__ireg + 4, __v)
+#define imsic_set_switchcase_16(__ireg, __v) \
+ imsic_set_switchcase_8(__ireg + 0, __v) \
+ imsic_set_switchcase_8(__ireg + 8, __v)
+#define imsic_set_switchcase_32(__ireg, __v) \
+ imsic_set_switchcase_16(__ireg + 0, __v) \
+ imsic_set_switchcase_16(__ireg + 16, __v)
+#define imsic_set_switchcase_64(__ireg, __v) \
+ imsic_set_switchcase_32(__ireg + 0, __v) \
+ imsic_set_switchcase_32(__ireg + 32, __v)
+
+static void imsic_eix_set(int ireg, unsigned long val)
+{
+ switch (ireg) {
+ imsic_set_switchcase_64(IMSIC_EIP0, val)
+ imsic_set_switchcase_64(IMSIC_EIE0, val)
+ };
+}
+
+static unsigned long imsic_mrif_atomic_rmw(struct imsic_mrif *mrif,
+ unsigned long *ptr,
+ unsigned long new_val,
+ unsigned long wr_mask)
+{
+ unsigned long old_val = 0, tmp = 0;
+
+ __asm__ __volatile__ (
+ "0: lr.w.aq %1, %0\n"
+ " and %2, %1, %3\n"
+ " or %2, %2, %4\n"
+ " sc.w.rl %2, %2, %0\n"
+ " bnez %2, 0b"
+ : "+A" (*ptr), "+r" (old_val), "+r" (tmp)
+ : "r" (~wr_mask), "r" (new_val & wr_mask)
+ : "memory");
+
+ return old_val;
+}
+
+static unsigned long imsic_mrif_atomic_or(struct imsic_mrif *mrif,
+ unsigned long *ptr,
+ unsigned long val)
+{
+ return arch_atomic_long_fetch_or(val, (atomic_long_t *)ptr);
+}
+
+#define imsic_mrif_atomic_write(__mrif, __ptr, __new_val) \
+ imsic_mrif_atomic_rmw(__mrif, __ptr, __new_val, -1UL)
+#define imsic_mrif_atomic_read(__mrif, __ptr) \
+ imsic_mrif_atomic_or(__mrif, __ptr, 0)
+
+static u32 imsic_mrif_topei(struct imsic_mrif *mrif, u32 nr_eix, u32 nr_msis)
+{
+ struct imsic_mrif_eix *eix;
+ u32 i, imin, imax, ei, max_msi;
+ unsigned long eipend[BITS_PER_TYPE(u64) / BITS_PER_LONG];
+ unsigned long eithreshold = imsic_mrif_atomic_read(mrif,
+ &mrif->eithreshold);
+
+ max_msi = (eithreshold && (eithreshold <= nr_msis)) ?
+ eithreshold : nr_msis;
+ for (ei = 0; ei < nr_eix; ei++) {
+ eix = &mrif->eix[ei];
+ eipend[0] = imsic_mrif_atomic_read(mrif, &eix->eie[0]) &
+ imsic_mrif_atomic_read(mrif, &eix->eip[0]);
+#ifdef CONFIG_32BIT
+ eipend[1] = imsic_mrif_atomic_read(mrif, &eix->eie[1]) &
+ imsic_mrif_atomic_read(mrif, &eix->eip[1]);
+ if (!eipend[0] && !eipend[1])
+#else
+ if (!eipend[0])
+#endif
+ continue;
+
+ imin = ei * BITS_PER_TYPE(u64);
+ imax = ((imin + BITS_PER_TYPE(u64)) < max_msi) ?
+ imin + BITS_PER_TYPE(u64) : max_msi;
+ for (i = (!imin) ? 1 : imin; i < imax; i++) {
+ if (test_bit(i - imin, eipend))
+ return (i << TOPEI_ID_SHIFT) | i;
+ }
+ }
+
+ return 0;
+}
+
+static int imsic_mrif_rmw(struct imsic_mrif *mrif, u32 nr_eix,
+ unsigned long isel, unsigned long *val,
+ unsigned long new_val, unsigned long wr_mask)
+{
+ bool pend;
+ struct imsic_mrif_eix *eix;
+ unsigned long *ei, num, old_val = 0;
+
+ switch (isel) {
+ case IMSIC_EIDELIVERY:
+ old_val = imsic_mrif_atomic_rmw(mrif, &mrif->eidelivery,
+ new_val, wr_mask & 0x1);
+ break;
+ case IMSIC_EITHRESHOLD:
+ old_val = imsic_mrif_atomic_rmw(mrif, &mrif->eithreshold,
+ new_val, wr_mask & (IMSIC_MAX_ID - 1));
+ break;
+ case IMSIC_EIP0 ... IMSIC_EIP63:
+ case IMSIC_EIE0 ... IMSIC_EIE63:
+ if (isel >= IMSIC_EIP0 && isel <= IMSIC_EIP63) {
+ pend = true;
+ num = isel - IMSIC_EIP0;
+ } else {
+ pend = false;
+ num = isel - IMSIC_EIE0;
+ }
+
+ if ((num / 2) >= nr_eix)
+ return -EINVAL;
+ eix = &mrif->eix[num / 2];
+
+#ifndef CONFIG_32BIT
+ if (num & 0x1)
+ return -EINVAL;
+ ei = (pend) ? &eix->eip[0] : &eix->eie[0];
+#else
+ ei = (pend) ? &eix->eip[num & 0x1] : &eix->eie[num & 0x1];
+#endif
+
+ /* Bit0 of EIP0 or EIE0 is read-only */
+ if (!num)
+ wr_mask &= ~BIT(0);
+
+ old_val = imsic_mrif_atomic_rmw(mrif, ei, new_val, wr_mask);
+ break;
+ default:
+ return -ENOENT;
+ };
+
+ if (val)
+ *val = old_val;
+
+ return 0;
+}
+
+struct imsic_vsfile_read_data {
+ int hgei;
+ u32 nr_eix;
+ bool clear;
+ struct imsic_mrif *mrif;
+};
+
+static void imsic_vsfile_local_read(void *data)
+{
+ u32 i;
+ struct imsic_mrif_eix *eix;
+ struct imsic_vsfile_read_data *idata = data;
+ struct imsic_mrif *mrif = idata->mrif;
+ unsigned long new_hstatus, old_hstatus, old_vsiselect;
+
+ old_vsiselect = csr_read(CSR_VSISELECT);
+ old_hstatus = csr_read(CSR_HSTATUS);
+ new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
+ new_hstatus |= ((unsigned long)idata->hgei) << HSTATUS_VGEIN_SHIFT;
+ csr_write(CSR_HSTATUS, new_hstatus);
+
+ /*
+ * We don't use imsic_mrif_atomic_xyz() functions to store
+ * values in MRIF because imsic_vsfile_read() is always called
+ * with pointer to temporary MRIF on stack.
+ */
+
+ if (idata->clear) {
+ mrif->eidelivery = imsic_vs_csr_swap(IMSIC_EIDELIVERY, 0);
+ mrif->eithreshold = imsic_vs_csr_swap(IMSIC_EITHRESHOLD, 0);
+ for (i = 0; i < idata->nr_eix; i++) {
+ eix = &mrif->eix[i];
+ eix->eip[0] = imsic_eix_swap(IMSIC_EIP0 + i * 2, 0);
+ eix->eie[0] = imsic_eix_swap(IMSIC_EIE0 + i * 2, 0);
+#ifdef CONFIG_32BIT
+ eix->eip[1] = imsic_eix_swap(IMSIC_EIP0 + i * 2 + 1, 0);
+ eix->eie[1] = imsic_eix_swap(IMSIC_EIE0 + i * 2 + 1, 0);
+#endif
+ }
+ } else {
+ mrif->eidelivery = imsic_vs_csr_read(IMSIC_EIDELIVERY);
+ mrif->eithreshold = imsic_vs_csr_read(IMSIC_EITHRESHOLD);
+ for (i = 0; i < idata->nr_eix; i++) {
+ eix = &mrif->eix[i];
+ eix->eip[0] = imsic_eix_read(IMSIC_EIP0 + i * 2);
+ eix->eie[0] = imsic_eix_read(IMSIC_EIE0 + i * 2);
+#ifdef CONFIG_32BIT
+ eix->eip[1] = imsic_eix_read(IMSIC_EIP0 + i * 2 + 1);
+ eix->eie[1] = imsic_eix_read(IMSIC_EIE0 + i * 2 + 1);
+#endif
+ }
+ }
+
+ csr_write(CSR_HSTATUS, old_hstatus);
+ csr_write(CSR_VSISELECT, old_vsiselect);
+}
+
+static void imsic_vsfile_read(int vsfile_hgei, int vsfile_cpu, u32 nr_eix,
+ bool clear, struct imsic_mrif *mrif)
+{
+ struct imsic_vsfile_read_data idata;
+
+ /* We can only read clear if we have a IMSIC VS-file */
+ if (vsfile_cpu < 0 || vsfile_hgei <= 0)
+ return;
+
+ /* We can only read clear on local CPU */
+ idata.hgei = vsfile_hgei;
+ idata.nr_eix = nr_eix;
+ idata.clear = clear;
+ idata.mrif = mrif;
+ on_each_cpu_mask(cpumask_of(vsfile_cpu),
+ imsic_vsfile_local_read, &idata, 1);
+}
+
+static void imsic_vsfile_local_clear(int vsfile_hgei, u32 nr_eix)
+{
+ u32 i;
+ unsigned long new_hstatus, old_hstatus, old_vsiselect;
+
+ /* We can only zero-out if we have a IMSIC VS-file */
+ if (vsfile_hgei <= 0)
+ return;
+
+ old_vsiselect = csr_read(CSR_VSISELECT);
+ old_hstatus = csr_read(CSR_HSTATUS);
+ new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
+ new_hstatus |= ((unsigned long)vsfile_hgei) << HSTATUS_VGEIN_SHIFT;
+ csr_write(CSR_HSTATUS, new_hstatus);
+
+ imsic_vs_csr_write(IMSIC_EIDELIVERY, 0);
+ imsic_vs_csr_write(IMSIC_EITHRESHOLD, 0);
+ for (i = 0; i < nr_eix; i++) {
+ imsic_eix_write(IMSIC_EIP0 + i * 2, 0);
+ imsic_eix_write(IMSIC_EIE0 + i * 2, 0);
+#ifdef CONFIG_32BIT
+ imsic_eix_write(IMSIC_EIP0 + i * 2 + 1, 0);
+ imsic_eix_write(IMSIC_EIE0 + i * 2 + 1, 0);
+#endif
+ }
+
+ csr_write(CSR_HSTATUS, old_hstatus);
+ csr_write(CSR_VSISELECT, old_vsiselect);
+}
+
+static void imsic_vsfile_local_update(int vsfile_hgei, u32 nr_eix,
+ struct imsic_mrif *mrif)
+{
+ u32 i;
+ struct imsic_mrif_eix *eix;
+ unsigned long new_hstatus, old_hstatus, old_vsiselect;
+
+ /* We can only update if we have a HW IMSIC context */
+ if (vsfile_hgei <= 0)
+ return;
+
+ /*
+ * We don't use imsic_mrif_atomic_xyz() functions to read values
+ * from MRIF in this function because it is always called with
+ * pointer to temporary MRIF on stack.
+ */
+
+ old_vsiselect = csr_read(CSR_VSISELECT);
+ old_hstatus = csr_read(CSR_HSTATUS);
+ new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
+ new_hstatus |= ((unsigned long)vsfile_hgei) << HSTATUS_VGEIN_SHIFT;
+ csr_write(CSR_HSTATUS, new_hstatus);
+
+ for (i = 0; i < nr_eix; i++) {
+ eix = &mrif->eix[i];
+ imsic_eix_set(IMSIC_EIP0 + i * 2, eix->eip[0]);
+ imsic_eix_set(IMSIC_EIE0 + i * 2, eix->eie[0]);
+#ifdef CONFIG_32BIT
+ imsic_eix_set(IMSIC_EIP0 + i * 2 + 1, eix->eip[1]);
+ imsic_eix_set(IMSIC_EIE0 + i * 2 + 1, eix->eie[1]);
+#endif
+ }
+ imsic_vs_csr_write(IMSIC_EITHRESHOLD, mrif->eithreshold);
+ imsic_vs_csr_write(IMSIC_EIDELIVERY, mrif->eidelivery);
+
+ csr_write(CSR_HSTATUS, old_hstatus);
+ csr_write(CSR_VSISELECT, old_vsiselect);
+}
+
+static void imsic_vsfile_cleanup(struct imsic *imsic)
+{
+ int old_vsfile_hgei, old_vsfile_cpu;
+ unsigned long flags;
+
+ /*
+ * We don't use imsic_mrif_atomic_xyz() functions to clear the
+ * SW-file in this function because it is always called when the
+ * VCPU is being destroyed.
+ */
+
+ write_lock_irqsave(&imsic->vsfile_lock, flags);
+ old_vsfile_hgei = imsic->vsfile_hgei;
+ old_vsfile_cpu = imsic->vsfile_cpu;
+ imsic->vsfile_cpu = imsic->vsfile_hgei = -1;
+ imsic->vsfile_va = NULL;
+ imsic->vsfile_pa = 0;
+ write_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ memset(imsic->swfile, 0, sizeof(*imsic->swfile));
+
+ if (old_vsfile_cpu >= 0)
+ kvm_riscv_aia_free_hgei(old_vsfile_cpu, old_vsfile_hgei);
+}
+
+static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu)
+{
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+ struct imsic_mrif *mrif = imsic->swfile;
+
+ if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) &&
+ imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis))
+ kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
+ else
+ kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
+}
+
+static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear,
+ struct imsic_mrif *mrif)
+{
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+ /*
+ * We don't use imsic_mrif_atomic_xyz() functions to read and
+ * write SW-file and MRIF in this function because it is always
+ * called when VCPU is not using SW-file and the MRIF points to
+ * a temporary MRIF on stack.
+ */
+
+ memcpy(mrif, imsic->swfile, sizeof(*mrif));
+ if (clear) {
+ memset(imsic->swfile, 0, sizeof(*imsic->swfile));
+ kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
+ }
+}
+
+static void imsic_swfile_update(struct kvm_vcpu *vcpu,
+ struct imsic_mrif *mrif)
+{
+ u32 i;
+ struct imsic_mrif_eix *seix, *eix;
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+ struct imsic_mrif *smrif = imsic->swfile;
+
+ imsic_mrif_atomic_write(smrif, &smrif->eidelivery, mrif->eidelivery);
+ imsic_mrif_atomic_write(smrif, &smrif->eithreshold, mrif->eithreshold);
+ for (i = 0; i < imsic->nr_eix; i++) {
+ seix = &smrif->eix[i];
+ eix = &mrif->eix[i];
+ imsic_mrif_atomic_or(smrif, &seix->eip[0], eix->eip[0]);
+ imsic_mrif_atomic_or(smrif, &seix->eie[0], eix->eie[0]);
+#ifdef CONFIG_32BIT
+ imsic_mrif_atomic_or(smrif, &seix->eip[1], eix->eip[1]);
+ imsic_mrif_atomic_or(smrif, &seix->eie[1], eix->eie[1]);
+#endif
+ }
+
+ imsic_swfile_extirq_update(vcpu);
+}
+
+void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags;
+ struct imsic_mrif tmrif;
+ int old_vsfile_hgei, old_vsfile_cpu;
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+ /* Read and clear IMSIC VS-file details */
+ write_lock_irqsave(&imsic->vsfile_lock, flags);
+ old_vsfile_hgei = imsic->vsfile_hgei;
+ old_vsfile_cpu = imsic->vsfile_cpu;
+ imsic->vsfile_cpu = imsic->vsfile_hgei = -1;
+ imsic->vsfile_va = NULL;
+ imsic->vsfile_pa = 0;
+ write_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ /* Do nothing, if no IMSIC VS-file to release */
+ if (old_vsfile_cpu < 0)
+ return;
+
+ /*
+ * At this point, all interrupt producers are still using
+ * the old IMSIC VS-file so we first re-direct all interrupt
+ * producers.
+ */
+
+ /* Purge the G-stage mapping */
+ kvm_riscv_gstage_iounmap(vcpu->kvm,
+ vcpu->arch.aia_context.imsic_addr,
+ IMSIC_MMIO_PAGE_SZ);
+
+ /* TODO: Purge the IOMMU mapping ??? */
+
+ /*
+ * At this point, all interrupt producers have been re-directed
+ * to somewhere else so we move register state from the old IMSIC
+ * VS-file to the IMSIC SW-file.
+ */
+
+ /* Read and clear register state from old IMSIC VS-file */
+ memset(&tmrif, 0, sizeof(tmrif));
+ imsic_vsfile_read(old_vsfile_hgei, old_vsfile_cpu, imsic->nr_hw_eix,
+ true, &tmrif);
+
+ /* Update register state in IMSIC SW-file */
+ imsic_swfile_update(vcpu, &tmrif);
+
+ /* Free-up old IMSIC VS-file */
+ kvm_riscv_aia_free_hgei(old_vsfile_cpu, old_vsfile_hgei);
+}
+
+int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags;
+ phys_addr_t new_vsfile_pa;
+ struct imsic_mrif tmrif;
+ void __iomem *new_vsfile_va;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context;
+ struct imsic *imsic = vaia->imsic_state;
+ int ret = 0, new_vsfile_hgei = -1, old_vsfile_hgei, old_vsfile_cpu;
+
+ /* Do nothing for emulation mode */
+ if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_EMUL)
+ return 1;
+
+ /* Read old IMSIC VS-file details */
+ read_lock_irqsave(&imsic->vsfile_lock, flags);
+ old_vsfile_hgei = imsic->vsfile_hgei;
+ old_vsfile_cpu = imsic->vsfile_cpu;
+ read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ /* Do nothing if we are continuing on same CPU */
+ if (old_vsfile_cpu == vcpu->cpu)
+ return 1;
+
+ /* Allocate new IMSIC VS-file */
+ ret = kvm_riscv_aia_alloc_hgei(vcpu->cpu, vcpu,
+ &new_vsfile_va, &new_vsfile_pa);
+ if (ret <= 0) {
+ /* For HW acceleration mode, we can't continue */
+ if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_HWACCEL) {
+ run->fail_entry.hardware_entry_failure_reason =
+ CSR_HSTATUS;
+ run->fail_entry.cpu = vcpu->cpu;
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ return 0;
+ }
+
+ /* Release old IMSIC VS-file */
+ if (old_vsfile_cpu >= 0)
+ kvm_riscv_vcpu_aia_imsic_release(vcpu);
+
+ /* For automatic mode, we continue */
+ goto done;
+ }
+ new_vsfile_hgei = ret;
+
+ /*
+ * At this point, all interrupt producers are still using
+ * to the old IMSIC VS-file so we first move all interrupt
+ * producers to the new IMSIC VS-file.
+ */
+
+ /* Zero-out new IMSIC VS-file */
+ imsic_vsfile_local_clear(new_vsfile_hgei, imsic->nr_hw_eix);
+
+ /* Update G-stage mapping for the new IMSIC VS-file */
+ ret = kvm_riscv_gstage_ioremap(kvm, vcpu->arch.aia_context.imsic_addr,
+ new_vsfile_pa, IMSIC_MMIO_PAGE_SZ,
+ true, true);
+ if (ret)
+ goto fail_free_vsfile_hgei;
+
+ /* TODO: Update the IOMMU mapping ??? */
+
+ /* Update new IMSIC VS-file details in IMSIC context */
+ write_lock_irqsave(&imsic->vsfile_lock, flags);
+ imsic->vsfile_hgei = new_vsfile_hgei;
+ imsic->vsfile_cpu = vcpu->cpu;
+ imsic->vsfile_va = new_vsfile_va;
+ imsic->vsfile_pa = new_vsfile_pa;
+ write_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ /*
+ * At this point, all interrupt producers have been moved
+ * to the new IMSIC VS-file so we move register state from
+ * the old IMSIC VS/SW-file to the new IMSIC VS-file.
+ */
+
+ memset(&tmrif, 0, sizeof(tmrif));
+ if (old_vsfile_cpu >= 0) {
+ /* Read and clear register state from old IMSIC VS-file */
+ imsic_vsfile_read(old_vsfile_hgei, old_vsfile_cpu,
+ imsic->nr_hw_eix, true, &tmrif);
+
+ /* Free-up old IMSIC VS-file */
+ kvm_riscv_aia_free_hgei(old_vsfile_cpu, old_vsfile_hgei);
+ } else {
+ /* Read and clear register state from IMSIC SW-file */
+ imsic_swfile_read(vcpu, true, &tmrif);
+ }
+
+ /* Restore register state in the new IMSIC VS-file */
+ imsic_vsfile_local_update(new_vsfile_hgei, imsic->nr_hw_eix, &tmrif);
+
+done:
+ /* Set VCPU HSTATUS.VGEIN to new IMSIC VS-file */
+ vcpu->arch.guest_context.hstatus &= ~HSTATUS_VGEIN;
+ if (new_vsfile_hgei > 0)
+ vcpu->arch.guest_context.hstatus |=
+ ((unsigned long)new_vsfile_hgei) << HSTATUS_VGEIN_SHIFT;
+
+ /* Continue run-loop */
+ return 1;
+
+fail_free_vsfile_hgei:
+ kvm_riscv_aia_free_hgei(vcpu->cpu, new_vsfile_hgei);
+ return ret;
+}
+
+int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel,
+ unsigned long *val, unsigned long new_val,
+ unsigned long wr_mask)
+{
+ u32 topei;
+ struct imsic_mrif_eix *eix;
+ int r, rc = KVM_INSN_CONTINUE_NEXT_SEPC;
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+ if (isel == KVM_RISCV_AIA_IMSIC_TOPEI) {
+ /* Read pending and enabled interrupt with highest priority */
+ topei = imsic_mrif_topei(imsic->swfile, imsic->nr_eix,
+ imsic->nr_msis);
+ if (val)
+ *val = topei;
+
+ /* Writes ignore value and clear top pending interrupt */
+ if (topei && wr_mask) {
+ topei >>= TOPEI_ID_SHIFT;
+ if (topei) {
+ eix = &imsic->swfile->eix[topei /
+ BITS_PER_TYPE(u64)];
+ clear_bit(topei & (BITS_PER_TYPE(u64) - 1),
+ eix->eip);
+ }
+ }
+ } else {
+ r = imsic_mrif_rmw(imsic->swfile, imsic->nr_eix, isel,
+ val, new_val, wr_mask);
+ /* Forward unknown IMSIC register to user-space */
+ if (r)
+ rc = (r == -ENOENT) ? 0 : KVM_INSN_ILLEGAL_TRAP;
+ }
+
+ if (wr_mask)
+ imsic_swfile_extirq_update(vcpu);
+
+ return rc;
+}
+
+void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu)
+{
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+ if (!imsic)
+ return;
+
+ kvm_riscv_vcpu_aia_imsic_release(vcpu);
+
+ memset(imsic->swfile, 0, sizeof(*imsic->swfile));
+}
+
+int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
+ u32 guest_index, u32 offset, u32 iid)
+{
+ unsigned long flags;
+ struct imsic_mrif_eix *eix;
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+ /* We only emulate one IMSIC MMIO page for each Guest VCPU */
+ if (!imsic || !iid || guest_index ||
+ (offset != IMSIC_MMIO_SETIPNUM_LE &&
+ offset != IMSIC_MMIO_SETIPNUM_BE))
+ return -ENODEV;
+
+ iid = (offset == IMSIC_MMIO_SETIPNUM_BE) ? __swab32(iid) : iid;
+ if (imsic->nr_msis <= iid)
+ return -EINVAL;
+
+ read_lock_irqsave(&imsic->vsfile_lock, flags);
+
+ if (imsic->vsfile_cpu >= 0) {
+ writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE);
+ kvm_vcpu_kick(vcpu);
+ } else {
+ eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)];
+ set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip);
+ imsic_swfile_extirq_update(vcpu);
+ }
+
+ read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ return 0;
+}
+
+static int imsic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, int len, void *val)
+{
+ if (len != 4 || (addr & 0x3) != 0)
+ return -EOPNOTSUPP;
+
+ *((u32 *)val) = 0;
+
+ return 0;
+}
+
+static int imsic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, int len, const void *val)
+{
+ struct kvm_msi msi = { 0 };
+
+ if (len != 4 || (addr & 0x3) != 0)
+ return -EOPNOTSUPP;
+
+ msi.address_hi = addr >> 32;
+ msi.address_lo = (u32)addr;
+ msi.data = *((const u32 *)val);
+ kvm_riscv_aia_inject_msi(vcpu->kvm, &msi);
+
+ return 0;
+};
+
+static struct kvm_io_device_ops imsic_iodoev_ops = {
+ .read = imsic_mmio_read,
+ .write = imsic_mmio_write,
+};
+
+int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
+{
+ int ret = 0;
+ struct imsic *imsic;
+ struct page *swfile_page;
+ struct kvm *kvm = vcpu->kvm;
+
+ /* Fail if we have zero IDs */
+ if (!kvm->arch.aia.nr_ids)
+ return -EINVAL;
+
+ /* Allocate IMSIC context */
+ imsic = kzalloc(sizeof(*imsic), GFP_KERNEL);
+ if (!imsic)
+ return -ENOMEM;
+ vcpu->arch.aia_context.imsic_state = imsic;
+
+ /* Setup IMSIC context */
+ imsic->nr_msis = kvm->arch.aia.nr_ids + 1;
+ rwlock_init(&imsic->vsfile_lock);
+ imsic->nr_eix = BITS_TO_U64(imsic->nr_msis);
+ imsic->nr_hw_eix = BITS_TO_U64(kvm_riscv_aia_max_ids);
+ imsic->vsfile_hgei = imsic->vsfile_cpu = -1;
+
+ /* Setup IMSIC SW-file */
+ swfile_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(sizeof(*imsic->swfile)));
+ if (!swfile_page) {
+ ret = -ENOMEM;
+ goto fail_free_imsic;
+ }
+ imsic->swfile = page_to_virt(swfile_page);
+ imsic->swfile_pa = page_to_phys(swfile_page);
+
+ /* Setup IO device */
+ kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops);
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
+ vcpu->arch.aia_context.imsic_addr,
+ KVM_DEV_RISCV_IMSIC_SIZE,
+ &imsic->iodev);
+ mutex_unlock(&kvm->slots_lock);
+ if (ret)
+ goto fail_free_swfile;
+
+ return 0;
+
+fail_free_swfile:
+ free_pages((unsigned long)imsic->swfile,
+ get_order(sizeof(*imsic->swfile)));
+fail_free_imsic:
+ vcpu->arch.aia_context.imsic_state = NULL;
+ kfree(imsic);
+ return ret;
+}
+
+void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+ if (!imsic)
+ return;
+
+ imsic_vsfile_cleanup(imsic);
+
+ mutex_lock(&kvm->slots_lock);
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &imsic->iodev);
+ mutex_unlock(&kvm->slots_lock);
+
+ free_pages((unsigned long)imsic->swfile,
+ get_order(sizeof(*imsic->swfile)));
+
+ vcpu->arch.aia_context.imsic_state = NULL;
+ kfree(imsic);
+}
--
2.34.1


2023-06-15 05:45:12

by Yong-Xuan Wang

[permalink] [raw]
Subject: Re: [PATCH v2 07/10] RISC-V: KVM: Add in-kernel emulation of AIA APLIC

Hi Anup,

On Mon, Jun 12, 2023 at 1:42 PM Anup Patel <[email protected]> wrote:
>
> There is no virtualization support in AIA APLIC so we add in-kernel
> emulation of AIA APLIC which only supports MSI-mode (i.e. wired
> interrupts forwarded to AIA IMSIC as MSIs).
>
> Signed-off-by: Anup Patel <[email protected]>
> ---
> arch/riscv/include/asm/kvm_aia.h | 17 +-
> arch/riscv/kvm/Makefile | 1 +
> arch/riscv/kvm/aia_aplic.c | 574 +++++++++++++++++++++++++++++++
> 3 files changed, 578 insertions(+), 14 deletions(-)
> create mode 100644 arch/riscv/kvm/aia_aplic.c
>
> diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
> index a1281ebc9b92..f6bd8523395f 100644
> --- a/arch/riscv/include/asm/kvm_aia.h
> +++ b/arch/riscv/include/asm/kvm_aia.h
> @@ -129,20 +129,9 @@ static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
> {
> }
>
> -static inline int kvm_riscv_aia_aplic_inject(struct kvm *kvm,
> - u32 source, bool level)
> -{
> - return 0;
> -}
> -
> -static inline int kvm_riscv_aia_aplic_init(struct kvm *kvm)
> -{
> - return 0;
> -}
> -
> -static inline void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm)
> -{
> -}
> +int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level);
> +int kvm_riscv_aia_aplic_init(struct kvm *kvm);
> +void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm);
>
> #ifdef CONFIG_32BIT
> void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu);
> diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
> index dd69ebe098bd..94c43702c765 100644
> --- a/arch/riscv/kvm/Makefile
> +++ b/arch/riscv/kvm/Makefile
> @@ -28,3 +28,4 @@ kvm-y += vcpu_timer.o
> kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
> kvm-y += aia.o
> kvm-y += aia_device.o
> +kvm-y += aia_aplic.o
> diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
> new file mode 100644
> index 000000000000..1b0a4df64815
> --- /dev/null
> +++ b/arch/riscv/kvm/aia_aplic.c
> @@ -0,0 +1,574 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> + * Copyright (C) 2022 Ventana Micro Systems Inc.
> + *
> + * Authors:
> + * Anup Patel <[email protected]>
> + */
> +
> +#include <linux/kvm_host.h>
> +#include <linux/math.h>
> +#include <linux/spinlock.h>
> +#include <linux/swab.h>
> +#include <kvm/iodev.h>
> +#include <asm/kvm_aia_aplic.h>
> +
> +struct aplic_irq {
> + raw_spinlock_t lock;
> + u32 sourcecfg;
> + u32 state;
> +#define APLIC_IRQ_STATE_PENDING BIT(0)
> +#define APLIC_IRQ_STATE_ENABLED BIT(1)
> +#define APLIC_IRQ_STATE_ENPEND (APLIC_IRQ_STATE_PENDING | \
> + APLIC_IRQ_STATE_ENABLED)
> +#define APLIC_IRQ_STATE_INPUT BIT(8)
> + u32 target;
> +};
> +
> +struct aplic {
> + struct kvm_io_device iodev;
> +
> + u32 domaincfg;
> + u32 genmsi;
> +
> + u32 nr_irqs;
> + u32 nr_words;
> + struct aplic_irq *irqs;
> +};
> +
> +static u32 aplic_read_sourcecfg(struct aplic *aplic, u32 irq)
> +{
> + u32 ret;
> + unsigned long flags;
> + struct aplic_irq *irqd;
> +
> + if (!irq || aplic->nr_irqs <= irq)
> + return 0;
> + irqd = &aplic->irqs[irq];
> +
> + raw_spin_lock_irqsave(&irqd->lock, flags);
> + ret = irqd->sourcecfg;
> + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> +
> + return ret;
> +}
> +
> +static void aplic_write_sourcecfg(struct aplic *aplic, u32 irq, u32 val)
> +{
> + unsigned long flags;
> + struct aplic_irq *irqd;
> +
> + if (!irq || aplic->nr_irqs <= irq)
> + return;
> + irqd = &aplic->irqs[irq];
> +
> + if (val & APLIC_SOURCECFG_D)
> + val = 0;
> + else
> + val &= APLIC_SOURCECFG_SM_MASK;
> +
> + raw_spin_lock_irqsave(&irqd->lock, flags);
> + irqd->sourcecfg = val;
> + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> +}
> +
> +static u32 aplic_read_target(struct aplic *aplic, u32 irq)
> +{
> + u32 ret;
> + unsigned long flags;
> + struct aplic_irq *irqd;
> +
> + if (!irq || aplic->nr_irqs <= irq)
> + return 0;
> + irqd = &aplic->irqs[irq];
> +
> + raw_spin_lock_irqsave(&irqd->lock, flags);
> + ret = irqd->target;
> + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> +
> + return ret;
> +}
> +
> +static void aplic_write_target(struct aplic *aplic, u32 irq, u32 val)
> +{
> + unsigned long flags;
> + struct aplic_irq *irqd;
> +
> + if (!irq || aplic->nr_irqs <= irq)
> + return;
> + irqd = &aplic->irqs[irq];
> +
> + val &= APLIC_TARGET_EIID_MASK |
> + (APLIC_TARGET_HART_IDX_MASK << APLIC_TARGET_HART_IDX_SHIFT) |
> + (APLIC_TARGET_GUEST_IDX_MASK << APLIC_TARGET_GUEST_IDX_SHIFT);
> +
> + raw_spin_lock_irqsave(&irqd->lock, flags);
> + irqd->target = val;
> + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> +}
> +
> +static bool aplic_read_pending(struct aplic *aplic, u32 irq)
> +{
> + bool ret;
> + unsigned long flags;
> + struct aplic_irq *irqd;
> +
> + if (!irq || aplic->nr_irqs <= irq)
> + return false;
> + irqd = &aplic->irqs[irq];
> +
> + raw_spin_lock_irqsave(&irqd->lock, flags);
> + ret = (irqd->state & APLIC_IRQ_STATE_PENDING) ? true : false;
> + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> +
> + return ret;
> +}
> +
> +static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
> +{
> + unsigned long flags, sm;
> + struct aplic_irq *irqd;
> +
> + if (!irq || aplic->nr_irqs <= irq)
> + return;
> + irqd = &aplic->irqs[irq];
> +
> + raw_spin_lock_irqsave(&irqd->lock, flags);
> +
> + sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
> + if (!pending &&
> + ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
> + (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
> + goto skip_write_pending;
> +
> + if (pending)
> + irqd->state |= APLIC_IRQ_STATE_PENDING;
> + else
> + irqd->state &= ~APLIC_IRQ_STATE_PENDING;
> +
> +skip_write_pending:
> + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> +}
> +
> +static bool aplic_read_enabled(struct aplic *aplic, u32 irq)
> +{
> + bool ret;
> + unsigned long flags;
> + struct aplic_irq *irqd;
> +
> + if (!irq || aplic->nr_irqs <= irq)
> + return false;
> + irqd = &aplic->irqs[irq];
> +
> + raw_spin_lock_irqsave(&irqd->lock, flags);
> + ret = (irqd->state & APLIC_IRQ_STATE_ENABLED) ? true : false;
> + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> +
> + return ret;
> +}
> +
> +static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
> +{
> + unsigned long flags;
> + struct aplic_irq *irqd;
> +
> + if (!irq || aplic->nr_irqs <= irq)
> + return;
> + irqd = &aplic->irqs[irq];
> +
> + raw_spin_lock_irqsave(&irqd->lock, flags);
> + if (enabled)
> + irqd->state |= APLIC_IRQ_STATE_ENABLED;
> + else
> + irqd->state &= ~APLIC_IRQ_STATE_ENABLED;
> + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> +}
> +
> +static bool aplic_read_input(struct aplic *aplic, u32 irq)
> +{
> + bool ret;
> + unsigned long flags;
> + struct aplic_irq *irqd;
> +
> + if (!irq || aplic->nr_irqs <= irq)
> + return false;
> + irqd = &aplic->irqs[irq];
> +
> + raw_spin_lock_irqsave(&irqd->lock, flags);
> + ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
> + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> +
> + return ret;
> +}
> +
> +static void aplic_inject_msi(struct kvm *kvm, u32 irq, u32 target)
> +{
> + u32 hart_idx, guest_idx, eiid;
> +
> + hart_idx = target >> APLIC_TARGET_HART_IDX_SHIFT;
> + hart_idx &= APLIC_TARGET_HART_IDX_MASK;
> + guest_idx = target >> APLIC_TARGET_GUEST_IDX_SHIFT;
> + guest_idx &= APLIC_TARGET_GUEST_IDX_MASK;
> + eiid = target & APLIC_TARGET_EIID_MASK;
> + kvm_riscv_aia_inject_msi_by_id(kvm, hart_idx, guest_idx, eiid);
> +}
> +
> +static void aplic_update_irq_range(struct kvm *kvm, u32 first, u32 last)
> +{
> + bool inject;
> + u32 irq, target;
> + unsigned long flags;
> + struct aplic_irq *irqd;
> + struct aplic *aplic = kvm->arch.aia.aplic_state;
> +
> + if (!(aplic->domaincfg & APLIC_DOMAINCFG_IE))
> + return;
> +
> + for (irq = first; irq <= last; irq++) {
> + if (!irq || aplic->nr_irqs <= irq)
> + continue;
> + irqd = &aplic->irqs[irq];
> +
> + raw_spin_lock_irqsave(&irqd->lock, flags);
> +
> + inject = false;
> + target = irqd->target;
> + if (irqd->state & APLIC_IRQ_STATE_ENPEND) {
> + irqd->state &= ~APLIC_IRQ_STATE_PENDING;
> + inject = true;
> + }
> +
> + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> +
> + if (inject)
> + aplic_inject_msi(kvm, irq, target);
> + }
> +}
> +
> +int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level)
> +{
> + u32 target;
> + bool inject = false, ie;
> + unsigned long flags;
> + struct aplic_irq *irqd;
> + struct aplic *aplic = kvm->arch.aia.aplic_state;
> +
> + if (!aplic || !source || (aplic->nr_irqs <= source))
> + return -ENODEV;
> + irqd = &aplic->irqs[source];
> + ie = (aplic->domaincfg & APLIC_DOMAINCFG_IE) ? true : false;
> +
> + raw_spin_lock_irqsave(&irqd->lock, flags);
> +
> + if (irqd->sourcecfg & APLIC_SOURCECFG_D)
> + goto skip_unlock;
> +
> + switch (irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK) {
> + case APLIC_SOURCECFG_SM_EDGE_RISE:
> + if (level && !(irqd->state & APLIC_IRQ_STATE_INPUT) &&
> + !(irqd->state & APLIC_IRQ_STATE_PENDING))
> + irqd->state |= APLIC_IRQ_STATE_PENDING;
> + break;
> + case APLIC_SOURCECFG_SM_EDGE_FALL:
> + if (!level && (irqd->state & APLIC_IRQ_STATE_INPUT) &&
> + !(irqd->state & APLIC_IRQ_STATE_PENDING))
> + irqd->state |= APLIC_IRQ_STATE_PENDING;
> + break;
> + case APLIC_SOURCECFG_SM_LEVEL_HIGH:
> + if (level && !(irqd->state & APLIC_IRQ_STATE_PENDING))
> + irqd->state |= APLIC_IRQ_STATE_PENDING;
> + break;
> + case APLIC_SOURCECFG_SM_LEVEL_LOW:
> + if (!level && !(irqd->state & APLIC_IRQ_STATE_PENDING))
> + irqd->state |= APLIC_IRQ_STATE_PENDING;
> + break;
> + }
> +
> + if (level)
> + irqd->state |= APLIC_IRQ_STATE_INPUT;
> + else
> + irqd->state &= ~APLIC_IRQ_STATE_INPUT;
> +
> + target = irqd->target;
> + if (ie && (irqd->state & APLIC_IRQ_STATE_ENPEND)) {

I have tested these patches with QEMU as the VMM tool and observed
that the APLIC would continue to forward goldfish RTC interrupts even
when there were no pending interrupts. I made a modification to the
checking
((irqd->state & APLIC_IRQ_STATE_ENPEND) == APLIC_IRQ_STATE_ENPEND).
This resolved the problem, ensuring that interrupts are now forwarded
only when they are genuinely pending and enabled.

> + irqd->state &= ~APLIC_IRQ_STATE_PENDING;
> + inject = true;
> + }
> +
> +skip_unlock:
> + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> +
> + if (inject)
> + aplic_inject_msi(kvm, source, target);
> +
> + return 0;
> +}
> +
> +static u32 aplic_read_input_word(struct aplic *aplic, u32 word)
> +{
> + u32 i, ret = 0;
> +
> + for (i = 0; i < 32; i++)
> + ret |= aplic_read_input(aplic, word * 32 + i) ? BIT(i) : 0;
> +
> + return ret;
> +}
> +
> +static u32 aplic_read_pending_word(struct aplic *aplic, u32 word)
> +{
> + u32 i, ret = 0;
> +
> + for (i = 0; i < 32; i++)
> + ret |= aplic_read_pending(aplic, word * 32 + i) ? BIT(i) : 0;
> +
> + return ret;
> +}
> +
> +static void aplic_write_pending_word(struct aplic *aplic, u32 word,
> + u32 val, bool pending)
> +{
> + u32 i;
> +
> + for (i = 0; i < 32; i++) {
> + if (val & BIT(i))
> + aplic_write_pending(aplic, word * 32 + i, pending);
> + }
> +}
> +
> +static u32 aplic_read_enabled_word(struct aplic *aplic, u32 word)
> +{
> + u32 i, ret = 0;
> +
> + for (i = 0; i < 32; i++)
> + ret |= aplic_read_enabled(aplic, word * 32 + i) ? BIT(i) : 0;
> +
> + return ret;
> +}
> +
> +static void aplic_write_enabled_word(struct aplic *aplic, u32 word,
> + u32 val, bool enabled)
> +{
> + u32 i;
> +
> + for (i = 0; i < 32; i++) {
> + if (val & BIT(i))
> + aplic_write_enabled(aplic, word * 32 + i, enabled);
> + }
> +}
> +
> +static int aplic_mmio_read_offset(struct kvm *kvm, gpa_t off, u32 *val32)
> +{
> + u32 i;
> + struct aplic *aplic = kvm->arch.aia.aplic_state;
> +
> + if ((off & 0x3) != 0)
> + return -EOPNOTSUPP;
> +
> + if (off == APLIC_DOMAINCFG) {
> + *val32 = APLIC_DOMAINCFG_RDONLY |
> + aplic->domaincfg | APLIC_DOMAINCFG_DM;
> + } else if ((off >= APLIC_SOURCECFG_BASE) &&
> + (off < (APLIC_SOURCECFG_BASE + (aplic->nr_irqs - 1) * 4))) {
> + i = ((off - APLIC_SOURCECFG_BASE) >> 2) + 1;
> + *val32 = aplic_read_sourcecfg(aplic, i);
> + } else if ((off >= APLIC_SETIP_BASE) &&
> + (off < (APLIC_SETIP_BASE + aplic->nr_words * 4))) {
> + i = (off - APLIC_SETIP_BASE) >> 2;
> + *val32 = aplic_read_pending_word(aplic, i);
> + } else if (off == APLIC_SETIPNUM) {
> + *val32 = 0;
> + } else if ((off >= APLIC_CLRIP_BASE) &&
> + (off < (APLIC_CLRIP_BASE + aplic->nr_words * 4))) {
> + i = (off - APLIC_CLRIP_BASE) >> 2;
> + *val32 = aplic_read_input_word(aplic, i);
> + } else if (off == APLIC_CLRIPNUM) {
> + *val32 = 0;
> + } else if ((off >= APLIC_SETIE_BASE) &&
> + (off < (APLIC_SETIE_BASE + aplic->nr_words * 4))) {
> + i = (off - APLIC_SETIE_BASE) >> 2;
> + *val32 = aplic_read_enabled_word(aplic, i);
> + } else if (off == APLIC_SETIENUM) {
> + *val32 = 0;
> + } else if ((off >= APLIC_CLRIE_BASE) &&
> + (off < (APLIC_CLRIE_BASE + aplic->nr_words * 4))) {
> + *val32 = 0;
> + } else if (off == APLIC_CLRIENUM) {
> + *val32 = 0;
> + } else if (off == APLIC_SETIPNUM_LE) {
> + *val32 = 0;
> + } else if (off == APLIC_SETIPNUM_BE) {
> + *val32 = 0;
> + } else if (off == APLIC_GENMSI) {
> + *val32 = aplic->genmsi;
> + } else if ((off >= APLIC_TARGET_BASE) &&
> + (off < (APLIC_TARGET_BASE + (aplic->nr_irqs - 1) * 4))) {
> + i = ((off - APLIC_TARGET_BASE) >> 2) + 1;
> + *val32 = aplic_read_target(aplic, i);
> + } else
> + return -ENODEV;
> +
> + return 0;
> +}
> +
> +static int aplic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> + gpa_t addr, int len, void *val)
> +{
> + if (len != 4)
> + return -EOPNOTSUPP;
> +
> + return aplic_mmio_read_offset(vcpu->kvm,
> + addr - vcpu->kvm->arch.aia.aplic_addr,
> + val);
> +}
> +
> +static int aplic_mmio_write_offset(struct kvm *kvm, gpa_t off, u32 val32)
> +{
> + u32 i;
> + struct aplic *aplic = kvm->arch.aia.aplic_state;
> +
> + if ((off & 0x3) != 0)
> + return -EOPNOTSUPP;
> +
> + if (off == APLIC_DOMAINCFG) {
> + /* Only IE bit writeable */
> + aplic->domaincfg = val32 & APLIC_DOMAINCFG_IE;
> + } else if ((off >= APLIC_SOURCECFG_BASE) &&
> + (off < (APLIC_SOURCECFG_BASE + (aplic->nr_irqs - 1) * 4))) {
> + i = ((off - APLIC_SOURCECFG_BASE) >> 2) + 1;
> + aplic_write_sourcecfg(aplic, i, val32);
> + } else if ((off >= APLIC_SETIP_BASE) &&
> + (off < (APLIC_SETIP_BASE + aplic->nr_words * 4))) {
> + i = (off - APLIC_SETIP_BASE) >> 2;
> + aplic_write_pending_word(aplic, i, val32, true);
> + } else if (off == APLIC_SETIPNUM) {
> + aplic_write_pending(aplic, val32, true);
> + } else if ((off >= APLIC_CLRIP_BASE) &&
> + (off < (APLIC_CLRIP_BASE + aplic->nr_words * 4))) {
> + i = (off - APLIC_CLRIP_BASE) >> 2;
> + aplic_write_pending_word(aplic, i, val32, false);
> + } else if (off == APLIC_CLRIPNUM) {
> + aplic_write_pending(aplic, val32, false);
> + } else if ((off >= APLIC_SETIE_BASE) &&
> + (off < (APLIC_SETIE_BASE + aplic->nr_words * 4))) {
> + i = (off - APLIC_SETIE_BASE) >> 2;
> + aplic_write_enabled_word(aplic, i, val32, true);
> + } else if (off == APLIC_SETIENUM) {
> + aplic_write_enabled(aplic, val32, true);
> + } else if ((off >= APLIC_CLRIE_BASE) &&
> + (off < (APLIC_CLRIE_BASE + aplic->nr_words * 4))) {
> + i = (off - APLIC_CLRIE_BASE) >> 2;
> + aplic_write_enabled_word(aplic, i, val32, false);
> + } else if (off == APLIC_CLRIENUM) {
> + aplic_write_enabled(aplic, val32, false);
> + } else if (off == APLIC_SETIPNUM_LE) {
> + aplic_write_pending(aplic, val32, true);
> + } else if (off == APLIC_SETIPNUM_BE) {
> + aplic_write_pending(aplic, __swab32(val32), true);
> + } else if (off == APLIC_GENMSI) {
> + aplic->genmsi = val32 & ~(APLIC_TARGET_GUEST_IDX_MASK <<
> + APLIC_TARGET_GUEST_IDX_SHIFT);
> + kvm_riscv_aia_inject_msi_by_id(kvm,
> + val32 >> APLIC_TARGET_HART_IDX_SHIFT, 0,
> + val32 & APLIC_TARGET_EIID_MASK);
> + } else if ((off >= APLIC_TARGET_BASE) &&
> + (off < (APLIC_TARGET_BASE + (aplic->nr_irqs - 1) * 4))) {
> + i = ((off - APLIC_TARGET_BASE) >> 2) + 1;
> + aplic_write_target(aplic, i, val32);
> + } else
> + return -ENODEV;
> +
> + aplic_update_irq_range(kvm, 1, aplic->nr_irqs - 1);
> +
> + return 0;
> +}
> +
> +static int aplic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> + gpa_t addr, int len, const void *val)
> +{
> + if (len != 4)
> + return -EOPNOTSUPP;
> +
> + return aplic_mmio_write_offset(vcpu->kvm,
> + addr - vcpu->kvm->arch.aia.aplic_addr,
> + *((const u32 *)val));
> +}
> +
> +static struct kvm_io_device_ops aplic_iodoev_ops = {
> + .read = aplic_mmio_read,
> + .write = aplic_mmio_write,
> +};
> +
> +int kvm_riscv_aia_aplic_init(struct kvm *kvm)
> +{
> + int i, ret = 0;
> + struct aplic *aplic;
> +
> + /* Do nothing if we have zero sources */
> + if (!kvm->arch.aia.nr_sources)
> + return 0;
> +
> + /* Allocate APLIC global state */
> + aplic = kzalloc(sizeof(*aplic), GFP_KERNEL);
> + if (!aplic)
> + return -ENOMEM;
> + kvm->arch.aia.aplic_state = aplic;
> +
> + /* Setup APLIC IRQs */
> + aplic->nr_irqs = kvm->arch.aia.nr_sources + 1;
> + aplic->nr_words = DIV_ROUND_UP(aplic->nr_irqs, 32);
> + aplic->irqs = kcalloc(aplic->nr_irqs,
> + sizeof(*aplic->irqs), GFP_KERNEL);
> + if (!aplic->irqs) {
> + ret = -ENOMEM;
> + goto fail_free_aplic;
> + }
> + for (i = 0; i < aplic->nr_irqs; i++)
> + raw_spin_lock_init(&aplic->irqs[i].lock);
> +
> + /* Setup IO device */
> + kvm_iodevice_init(&aplic->iodev, &aplic_iodoev_ops);
> + mutex_lock(&kvm->slots_lock);
> + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
> + kvm->arch.aia.aplic_addr,
> + KVM_DEV_RISCV_APLIC_SIZE,
> + &aplic->iodev);
> + mutex_unlock(&kvm->slots_lock);
> + if (ret)
> + goto fail_free_aplic_irqs;
> +
> + /* Setup default IRQ routing */
> + ret = kvm_riscv_setup_default_irq_routing(kvm, aplic->nr_irqs);
> + if (ret)
> + goto fail_unreg_iodev;
> +
> + return 0;
> +
> +fail_unreg_iodev:
> + mutex_lock(&kvm->slots_lock);
> + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &aplic->iodev);
> + mutex_unlock(&kvm->slots_lock);
> +fail_free_aplic_irqs:
> + kfree(aplic->irqs);
> +fail_free_aplic:
> + kvm->arch.aia.aplic_state = NULL;
> + kfree(aplic);
> + return ret;
> +}
> +
> +void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm)
> +{
> + struct aplic *aplic = kvm->arch.aia.aplic_state;
> +
> + if (!aplic)
> + return;
> +
> + mutex_lock(&kvm->slots_lock);
> + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &aplic->iodev);
> + mutex_unlock(&kvm->slots_lock);
> +
> + kfree(aplic->irqs);
> +
> + kvm->arch.aia.aplic_state = NULL;
> + kfree(aplic);
> +}
> --
> 2.34.1
>
>
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv

Regards,
Yong-Xuan

2023-06-15 07:11:03

by Anup Patel

[permalink] [raw]
Subject: Re: [PATCH v2 07/10] RISC-V: KVM: Add in-kernel emulation of AIA APLIC

On Thu, Jun 15, 2023 at 11:00 AM Yong-Xuan Wang
<[email protected]> wrote:
>
> Hi Anup,
>
> On Mon, Jun 12, 2023 at 1:42 PM Anup Patel <[email protected]> wrote:
> >
> > There is no virtualization support in AIA APLIC so we add in-kernel
> > emulation of AIA APLIC which only supports MSI-mode (i.e. wired
> > interrupts forwarded to AIA IMSIC as MSIs).
> >
> > Signed-off-by: Anup Patel <[email protected]>
> > ---
> > arch/riscv/include/asm/kvm_aia.h | 17 +-
> > arch/riscv/kvm/Makefile | 1 +
> > arch/riscv/kvm/aia_aplic.c | 574 +++++++++++++++++++++++++++++++
> > 3 files changed, 578 insertions(+), 14 deletions(-)
> > create mode 100644 arch/riscv/kvm/aia_aplic.c
> >
> > diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
> > index a1281ebc9b92..f6bd8523395f 100644
> > --- a/arch/riscv/include/asm/kvm_aia.h
> > +++ b/arch/riscv/include/asm/kvm_aia.h
> > @@ -129,20 +129,9 @@ static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
> > {
> > }
> >
> > -static inline int kvm_riscv_aia_aplic_inject(struct kvm *kvm,
> > - u32 source, bool level)
> > -{
> > - return 0;
> > -}
> > -
> > -static inline int kvm_riscv_aia_aplic_init(struct kvm *kvm)
> > -{
> > - return 0;
> > -}
> > -
> > -static inline void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm)
> > -{
> > -}
> > +int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level);
> > +int kvm_riscv_aia_aplic_init(struct kvm *kvm);
> > +void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm);
> >
> > #ifdef CONFIG_32BIT
> > void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu);
> > diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
> > index dd69ebe098bd..94c43702c765 100644
> > --- a/arch/riscv/kvm/Makefile
> > +++ b/arch/riscv/kvm/Makefile
> > @@ -28,3 +28,4 @@ kvm-y += vcpu_timer.o
> > kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
> > kvm-y += aia.o
> > kvm-y += aia_device.o
> > +kvm-y += aia_aplic.o
> > diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
> > new file mode 100644
> > index 000000000000..1b0a4df64815
> > --- /dev/null
> > +++ b/arch/riscv/kvm/aia_aplic.c
> > @@ -0,0 +1,574 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> > + * Copyright (C) 2022 Ventana Micro Systems Inc.
> > + *
> > + * Authors:
> > + * Anup Patel <[email protected]>
> > + */
> > +
> > +#include <linux/kvm_host.h>
> > +#include <linux/math.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/swab.h>
> > +#include <kvm/iodev.h>
> > +#include <asm/kvm_aia_aplic.h>
> > +
> > +struct aplic_irq {
> > + raw_spinlock_t lock;
> > + u32 sourcecfg;
> > + u32 state;
> > +#define APLIC_IRQ_STATE_PENDING BIT(0)
> > +#define APLIC_IRQ_STATE_ENABLED BIT(1)
> > +#define APLIC_IRQ_STATE_ENPEND (APLIC_IRQ_STATE_PENDING | \
> > + APLIC_IRQ_STATE_ENABLED)
> > +#define APLIC_IRQ_STATE_INPUT BIT(8)
> > + u32 target;
> > +};
> > +
> > +struct aplic {
> > + struct kvm_io_device iodev;
> > +
> > + u32 domaincfg;
> > + u32 genmsi;
> > +
> > + u32 nr_irqs;
> > + u32 nr_words;
> > + struct aplic_irq *irqs;
> > +};
> > +
> > +static u32 aplic_read_sourcecfg(struct aplic *aplic, u32 irq)
> > +{
> > + u32 ret;
> > + unsigned long flags;
> > + struct aplic_irq *irqd;
> > +
> > + if (!irq || aplic->nr_irqs <= irq)
> > + return 0;
> > + irqd = &aplic->irqs[irq];
> > +
> > + raw_spin_lock_irqsave(&irqd->lock, flags);
> > + ret = irqd->sourcecfg;
> > + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> > +
> > + return ret;
> > +}
> > +
> > +static void aplic_write_sourcecfg(struct aplic *aplic, u32 irq, u32 val)
> > +{
> > + unsigned long flags;
> > + struct aplic_irq *irqd;
> > +
> > + if (!irq || aplic->nr_irqs <= irq)
> > + return;
> > + irqd = &aplic->irqs[irq];
> > +
> > + if (val & APLIC_SOURCECFG_D)
> > + val = 0;
> > + else
> > + val &= APLIC_SOURCECFG_SM_MASK;
> > +
> > + raw_spin_lock_irqsave(&irqd->lock, flags);
> > + irqd->sourcecfg = val;
> > + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> > +}
> > +
> > +static u32 aplic_read_target(struct aplic *aplic, u32 irq)
> > +{
> > + u32 ret;
> > + unsigned long flags;
> > + struct aplic_irq *irqd;
> > +
> > + if (!irq || aplic->nr_irqs <= irq)
> > + return 0;
> > + irqd = &aplic->irqs[irq];
> > +
> > + raw_spin_lock_irqsave(&irqd->lock, flags);
> > + ret = irqd->target;
> > + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> > +
> > + return ret;
> > +}
> > +
> > +static void aplic_write_target(struct aplic *aplic, u32 irq, u32 val)
> > +{
> > + unsigned long flags;
> > + struct aplic_irq *irqd;
> > +
> > + if (!irq || aplic->nr_irqs <= irq)
> > + return;
> > + irqd = &aplic->irqs[irq];
> > +
> > + val &= APLIC_TARGET_EIID_MASK |
> > + (APLIC_TARGET_HART_IDX_MASK << APLIC_TARGET_HART_IDX_SHIFT) |
> > + (APLIC_TARGET_GUEST_IDX_MASK << APLIC_TARGET_GUEST_IDX_SHIFT);
> > +
> > + raw_spin_lock_irqsave(&irqd->lock, flags);
> > + irqd->target = val;
> > + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> > +}
> > +
> > +static bool aplic_read_pending(struct aplic *aplic, u32 irq)
> > +{
> > + bool ret;
> > + unsigned long flags;
> > + struct aplic_irq *irqd;
> > +
> > + if (!irq || aplic->nr_irqs <= irq)
> > + return false;
> > + irqd = &aplic->irqs[irq];
> > +
> > + raw_spin_lock_irqsave(&irqd->lock, flags);
> > + ret = (irqd->state & APLIC_IRQ_STATE_PENDING) ? true : false;
> > + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> > +
> > + return ret;
> > +}
> > +
> > +static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
> > +{
> > + unsigned long flags, sm;
> > + struct aplic_irq *irqd;
> > +
> > + if (!irq || aplic->nr_irqs <= irq)
> > + return;
> > + irqd = &aplic->irqs[irq];
> > +
> > + raw_spin_lock_irqsave(&irqd->lock, flags);
> > +
> > + sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
> > + if (!pending &&
> > + ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
> > + (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
> > + goto skip_write_pending;
> > +
> > + if (pending)
> > + irqd->state |= APLIC_IRQ_STATE_PENDING;
> > + else
> > + irqd->state &= ~APLIC_IRQ_STATE_PENDING;
> > +
> > +skip_write_pending:
> > + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> > +}
> > +
> > +static bool aplic_read_enabled(struct aplic *aplic, u32 irq)
> > +{
> > + bool ret;
> > + unsigned long flags;
> > + struct aplic_irq *irqd;
> > +
> > + if (!irq || aplic->nr_irqs <= irq)
> > + return false;
> > + irqd = &aplic->irqs[irq];
> > +
> > + raw_spin_lock_irqsave(&irqd->lock, flags);
> > + ret = (irqd->state & APLIC_IRQ_STATE_ENABLED) ? true : false;
> > + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> > +
> > + return ret;
> > +}
> > +
> > +static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
> > +{
> > + unsigned long flags;
> > + struct aplic_irq *irqd;
> > +
> > + if (!irq || aplic->nr_irqs <= irq)
> > + return;
> > + irqd = &aplic->irqs[irq];
> > +
> > + raw_spin_lock_irqsave(&irqd->lock, flags);
> > + if (enabled)
> > + irqd->state |= APLIC_IRQ_STATE_ENABLED;
> > + else
> > + irqd->state &= ~APLIC_IRQ_STATE_ENABLED;
> > + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> > +}
> > +
> > +static bool aplic_read_input(struct aplic *aplic, u32 irq)
> > +{
> > + bool ret;
> > + unsigned long flags;
> > + struct aplic_irq *irqd;
> > +
> > + if (!irq || aplic->nr_irqs <= irq)
> > + return false;
> > + irqd = &aplic->irqs[irq];
> > +
> > + raw_spin_lock_irqsave(&irqd->lock, flags);
> > + ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
> > + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> > +
> > + return ret;
> > +}
> > +
> > +static void aplic_inject_msi(struct kvm *kvm, u32 irq, u32 target)
> > +{
> > + u32 hart_idx, guest_idx, eiid;
> > +
> > + hart_idx = target >> APLIC_TARGET_HART_IDX_SHIFT;
> > + hart_idx &= APLIC_TARGET_HART_IDX_MASK;
> > + guest_idx = target >> APLIC_TARGET_GUEST_IDX_SHIFT;
> > + guest_idx &= APLIC_TARGET_GUEST_IDX_MASK;
> > + eiid = target & APLIC_TARGET_EIID_MASK;
> > + kvm_riscv_aia_inject_msi_by_id(kvm, hart_idx, guest_idx, eiid);
> > +}
> > +
> > +static void aplic_update_irq_range(struct kvm *kvm, u32 first, u32 last)
> > +{
> > + bool inject;
> > + u32 irq, target;
> > + unsigned long flags;
> > + struct aplic_irq *irqd;
> > + struct aplic *aplic = kvm->arch.aia.aplic_state;
> > +
> > + if (!(aplic->domaincfg & APLIC_DOMAINCFG_IE))
> > + return;
> > +
> > + for (irq = first; irq <= last; irq++) {
> > + if (!irq || aplic->nr_irqs <= irq)
> > + continue;
> > + irqd = &aplic->irqs[irq];
> > +
> > + raw_spin_lock_irqsave(&irqd->lock, flags);
> > +
> > + inject = false;
> > + target = irqd->target;
> > + if (irqd->state & APLIC_IRQ_STATE_ENPEND) {
> > + irqd->state &= ~APLIC_IRQ_STATE_PENDING;
> > + inject = true;
> > + }
> > +
> > + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> > +
> > + if (inject)
> > + aplic_inject_msi(kvm, irq, target);
> > + }
> > +}
> > +
> > +int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level)
> > +{
> > + u32 target;
> > + bool inject = false, ie;
> > + unsigned long flags;
> > + struct aplic_irq *irqd;
> > + struct aplic *aplic = kvm->arch.aia.aplic_state;
> > +
> > + if (!aplic || !source || (aplic->nr_irqs <= source))
> > + return -ENODEV;
> > + irqd = &aplic->irqs[source];
> > + ie = (aplic->domaincfg & APLIC_DOMAINCFG_IE) ? true : false;
> > +
> > + raw_spin_lock_irqsave(&irqd->lock, flags);
> > +
> > + if (irqd->sourcecfg & APLIC_SOURCECFG_D)
> > + goto skip_unlock;
> > +
> > + switch (irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK) {
> > + case APLIC_SOURCECFG_SM_EDGE_RISE:
> > + if (level && !(irqd->state & APLIC_IRQ_STATE_INPUT) &&
> > + !(irqd->state & APLIC_IRQ_STATE_PENDING))
> > + irqd->state |= APLIC_IRQ_STATE_PENDING;
> > + break;
> > + case APLIC_SOURCECFG_SM_EDGE_FALL:
> > + if (!level && (irqd->state & APLIC_IRQ_STATE_INPUT) &&
> > + !(irqd->state & APLIC_IRQ_STATE_PENDING))
> > + irqd->state |= APLIC_IRQ_STATE_PENDING;
> > + break;
> > + case APLIC_SOURCECFG_SM_LEVEL_HIGH:
> > + if (level && !(irqd->state & APLIC_IRQ_STATE_PENDING))
> > + irqd->state |= APLIC_IRQ_STATE_PENDING;
> > + break;
> > + case APLIC_SOURCECFG_SM_LEVEL_LOW:
> > + if (!level && !(irqd->state & APLIC_IRQ_STATE_PENDING))
> > + irqd->state |= APLIC_IRQ_STATE_PENDING;
> > + break;
> > + }
> > +
> > + if (level)
> > + irqd->state |= APLIC_IRQ_STATE_INPUT;
> > + else
> > + irqd->state &= ~APLIC_IRQ_STATE_INPUT;
> > +
> > + target = irqd->target;
> > + if (ie && (irqd->state & APLIC_IRQ_STATE_ENPEND)) {
>
> I have tested these patches with QEMU as the VMM tool and observed
> that the APLIC would continue to forward goldfish RTC interrupts even
> when there were no pending interrupts. I made a modification to the
> checking
> ((irqd->state & APLIC_IRQ_STATE_ENPEND) == APLIC_IRQ_STATE_ENPEND).
> This resolved the problem, ensuring that interrupts are now forwarded
> only when they are genuinely pending and enabled.

Good catch. I never saw this issue on KVMTOOL. I will update
it in the next revision.

Thanks,
Anup

>
> > + irqd->state &= ~APLIC_IRQ_STATE_PENDING;
> > + inject = true;
> > + }
> > +
> > +skip_unlock:
> > + raw_spin_unlock_irqrestore(&irqd->lock, flags);
> > +
> > + if (inject)
> > + aplic_inject_msi(kvm, source, target);
> > +
> > + return 0;
> > +}
> > +
> > +static u32 aplic_read_input_word(struct aplic *aplic, u32 word)
> > +{
> > + u32 i, ret = 0;
> > +
> > + for (i = 0; i < 32; i++)
> > + ret |= aplic_read_input(aplic, word * 32 + i) ? BIT(i) : 0;
> > +
> > + return ret;
> > +}
> > +
> > +static u32 aplic_read_pending_word(struct aplic *aplic, u32 word)
> > +{
> > + u32 i, ret = 0;
> > +
> > + for (i = 0; i < 32; i++)
> > + ret |= aplic_read_pending(aplic, word * 32 + i) ? BIT(i) : 0;
> > +
> > + return ret;
> > +}
> > +
> > +static void aplic_write_pending_word(struct aplic *aplic, u32 word,
> > + u32 val, bool pending)
> > +{
> > + u32 i;
> > +
> > + for (i = 0; i < 32; i++) {
> > + if (val & BIT(i))
> > + aplic_write_pending(aplic, word * 32 + i, pending);
> > + }
> > +}
> > +
> > +static u32 aplic_read_enabled_word(struct aplic *aplic, u32 word)
> > +{
> > + u32 i, ret = 0;
> > +
> > + for (i = 0; i < 32; i++)
> > + ret |= aplic_read_enabled(aplic, word * 32 + i) ? BIT(i) : 0;
> > +
> > + return ret;
> > +}
> > +
> > +static void aplic_write_enabled_word(struct aplic *aplic, u32 word,
> > + u32 val, bool enabled)
> > +{
> > + u32 i;
> > +
> > + for (i = 0; i < 32; i++) {
> > + if (val & BIT(i))
> > + aplic_write_enabled(aplic, word * 32 + i, enabled);
> > + }
> > +}
> > +
> > +static int aplic_mmio_read_offset(struct kvm *kvm, gpa_t off, u32 *val32)
> > +{
> > + u32 i;
> > + struct aplic *aplic = kvm->arch.aia.aplic_state;
> > +
> > + if ((off & 0x3) != 0)
> > + return -EOPNOTSUPP;
> > +
> > + if (off == APLIC_DOMAINCFG) {
> > + *val32 = APLIC_DOMAINCFG_RDONLY |
> > + aplic->domaincfg | APLIC_DOMAINCFG_DM;
> > + } else if ((off >= APLIC_SOURCECFG_BASE) &&
> > + (off < (APLIC_SOURCECFG_BASE + (aplic->nr_irqs - 1) * 4))) {
> > + i = ((off - APLIC_SOURCECFG_BASE) >> 2) + 1;
> > + *val32 = aplic_read_sourcecfg(aplic, i);
> > + } else if ((off >= APLIC_SETIP_BASE) &&
> > + (off < (APLIC_SETIP_BASE + aplic->nr_words * 4))) {
> > + i = (off - APLIC_SETIP_BASE) >> 2;
> > + *val32 = aplic_read_pending_word(aplic, i);
> > + } else if (off == APLIC_SETIPNUM) {
> > + *val32 = 0;
> > + } else if ((off >= APLIC_CLRIP_BASE) &&
> > + (off < (APLIC_CLRIP_BASE + aplic->nr_words * 4))) {
> > + i = (off - APLIC_CLRIP_BASE) >> 2;
> > + *val32 = aplic_read_input_word(aplic, i);
> > + } else if (off == APLIC_CLRIPNUM) {
> > + *val32 = 0;
> > + } else if ((off >= APLIC_SETIE_BASE) &&
> > + (off < (APLIC_SETIE_BASE + aplic->nr_words * 4))) {
> > + i = (off - APLIC_SETIE_BASE) >> 2;
> > + *val32 = aplic_read_enabled_word(aplic, i);
> > + } else if (off == APLIC_SETIENUM) {
> > + *val32 = 0;
> > + } else if ((off >= APLIC_CLRIE_BASE) &&
> > + (off < (APLIC_CLRIE_BASE + aplic->nr_words * 4))) {
> > + *val32 = 0;
> > + } else if (off == APLIC_CLRIENUM) {
> > + *val32 = 0;
> > + } else if (off == APLIC_SETIPNUM_LE) {
> > + *val32 = 0;
> > + } else if (off == APLIC_SETIPNUM_BE) {
> > + *val32 = 0;
> > + } else if (off == APLIC_GENMSI) {
> > + *val32 = aplic->genmsi;
> > + } else if ((off >= APLIC_TARGET_BASE) &&
> > + (off < (APLIC_TARGET_BASE + (aplic->nr_irqs - 1) * 4))) {
> > + i = ((off - APLIC_TARGET_BASE) >> 2) + 1;
> > + *val32 = aplic_read_target(aplic, i);
> > + } else
> > + return -ENODEV;
> > +
> > + return 0;
> > +}
> > +
> > +static int aplic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> > + gpa_t addr, int len, void *val)
> > +{
> > + if (len != 4)
> > + return -EOPNOTSUPP;
> > +
> > + return aplic_mmio_read_offset(vcpu->kvm,
> > + addr - vcpu->kvm->arch.aia.aplic_addr,
> > + val);
> > +}
> > +
> > +static int aplic_mmio_write_offset(struct kvm *kvm, gpa_t off, u32 val32)
> > +{
> > + u32 i;
> > + struct aplic *aplic = kvm->arch.aia.aplic_state;
> > +
> > + if ((off & 0x3) != 0)
> > + return -EOPNOTSUPP;
> > +
> > + if (off == APLIC_DOMAINCFG) {
> > + /* Only IE bit writeable */
> > + aplic->domaincfg = val32 & APLIC_DOMAINCFG_IE;
> > + } else if ((off >= APLIC_SOURCECFG_BASE) &&
> > + (off < (APLIC_SOURCECFG_BASE + (aplic->nr_irqs - 1) * 4))) {
> > + i = ((off - APLIC_SOURCECFG_BASE) >> 2) + 1;
> > + aplic_write_sourcecfg(aplic, i, val32);
> > + } else if ((off >= APLIC_SETIP_BASE) &&
> > + (off < (APLIC_SETIP_BASE + aplic->nr_words * 4))) {
> > + i = (off - APLIC_SETIP_BASE) >> 2;
> > + aplic_write_pending_word(aplic, i, val32, true);
> > + } else if (off == APLIC_SETIPNUM) {
> > + aplic_write_pending(aplic, val32, true);
> > + } else if ((off >= APLIC_CLRIP_BASE) &&
> > + (off < (APLIC_CLRIP_BASE + aplic->nr_words * 4))) {
> > + i = (off - APLIC_CLRIP_BASE) >> 2;
> > + aplic_write_pending_word(aplic, i, val32, false);
> > + } else if (off == APLIC_CLRIPNUM) {
> > + aplic_write_pending(aplic, val32, false);
> > + } else if ((off >= APLIC_SETIE_BASE) &&
> > + (off < (APLIC_SETIE_BASE + aplic->nr_words * 4))) {
> > + i = (off - APLIC_SETIE_BASE) >> 2;
> > + aplic_write_enabled_word(aplic, i, val32, true);
> > + } else if (off == APLIC_SETIENUM) {
> > + aplic_write_enabled(aplic, val32, true);
> > + } else if ((off >= APLIC_CLRIE_BASE) &&
> > + (off < (APLIC_CLRIE_BASE + aplic->nr_words * 4))) {
> > + i = (off - APLIC_CLRIE_BASE) >> 2;
> > + aplic_write_enabled_word(aplic, i, val32, false);
> > + } else if (off == APLIC_CLRIENUM) {
> > + aplic_write_enabled(aplic, val32, false);
> > + } else if (off == APLIC_SETIPNUM_LE) {
> > + aplic_write_pending(aplic, val32, true);
> > + } else if (off == APLIC_SETIPNUM_BE) {
> > + aplic_write_pending(aplic, __swab32(val32), true);
> > + } else if (off == APLIC_GENMSI) {
> > + aplic->genmsi = val32 & ~(APLIC_TARGET_GUEST_IDX_MASK <<
> > + APLIC_TARGET_GUEST_IDX_SHIFT);
> > + kvm_riscv_aia_inject_msi_by_id(kvm,
> > + val32 >> APLIC_TARGET_HART_IDX_SHIFT, 0,
> > + val32 & APLIC_TARGET_EIID_MASK);
> > + } else if ((off >= APLIC_TARGET_BASE) &&
> > + (off < (APLIC_TARGET_BASE + (aplic->nr_irqs - 1) * 4))) {
> > + i = ((off - APLIC_TARGET_BASE) >> 2) + 1;
> > + aplic_write_target(aplic, i, val32);
> > + } else
> > + return -ENODEV;
> > +
> > + aplic_update_irq_range(kvm, 1, aplic->nr_irqs - 1);
> > +
> > + return 0;
> > +}
> > +
> > +static int aplic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> > + gpa_t addr, int len, const void *val)
> > +{
> > + if (len != 4)
> > + return -EOPNOTSUPP;
> > +
> > + return aplic_mmio_write_offset(vcpu->kvm,
> > + addr - vcpu->kvm->arch.aia.aplic_addr,
> > + *((const u32 *)val));
> > +}
> > +
> > +static struct kvm_io_device_ops aplic_iodoev_ops = {
> > + .read = aplic_mmio_read,
> > + .write = aplic_mmio_write,
> > +};
> > +
> > +int kvm_riscv_aia_aplic_init(struct kvm *kvm)
> > +{
> > + int i, ret = 0;
> > + struct aplic *aplic;
> > +
> > + /* Do nothing if we have zero sources */
> > + if (!kvm->arch.aia.nr_sources)
> > + return 0;
> > +
> > + /* Allocate APLIC global state */
> > + aplic = kzalloc(sizeof(*aplic), GFP_KERNEL);
> > + if (!aplic)
> > + return -ENOMEM;
> > + kvm->arch.aia.aplic_state = aplic;
> > +
> > + /* Setup APLIC IRQs */
> > + aplic->nr_irqs = kvm->arch.aia.nr_sources + 1;
> > + aplic->nr_words = DIV_ROUND_UP(aplic->nr_irqs, 32);
> > + aplic->irqs = kcalloc(aplic->nr_irqs,
> > + sizeof(*aplic->irqs), GFP_KERNEL);
> > + if (!aplic->irqs) {
> > + ret = -ENOMEM;
> > + goto fail_free_aplic;
> > + }
> > + for (i = 0; i < aplic->nr_irqs; i++)
> > + raw_spin_lock_init(&aplic->irqs[i].lock);
> > +
> > + /* Setup IO device */
> > + kvm_iodevice_init(&aplic->iodev, &aplic_iodoev_ops);
> > + mutex_lock(&kvm->slots_lock);
> > + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
> > + kvm->arch.aia.aplic_addr,
> > + KVM_DEV_RISCV_APLIC_SIZE,
> > + &aplic->iodev);
> > + mutex_unlock(&kvm->slots_lock);
> > + if (ret)
> > + goto fail_free_aplic_irqs;
> > +
> > + /* Setup default IRQ routing */
> > + ret = kvm_riscv_setup_default_irq_routing(kvm, aplic->nr_irqs);
> > + if (ret)
> > + goto fail_unreg_iodev;
> > +
> > + return 0;
> > +
> > +fail_unreg_iodev:
> > + mutex_lock(&kvm->slots_lock);
> > + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &aplic->iodev);
> > + mutex_unlock(&kvm->slots_lock);
> > +fail_free_aplic_irqs:
> > + kfree(aplic->irqs);
> > +fail_free_aplic:
> > + kvm->arch.aia.aplic_state = NULL;
> > + kfree(aplic);
> > + return ret;
> > +}
> > +
> > +void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm)
> > +{
> > + struct aplic *aplic = kvm->arch.aia.aplic_state;
> > +
> > + if (!aplic)
> > + return;
> > +
> > + mutex_lock(&kvm->slots_lock);
> > + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &aplic->iodev);
> > + mutex_unlock(&kvm->slots_lock);
> > +
> > + kfree(aplic->irqs);
> > +
> > + kvm->arch.aia.aplic_state = NULL;
> > + kfree(aplic);
> > +}
> > --
> > 2.34.1
> >
> >
> > _______________________________________________
> > linux-riscv mailing list
> > [email protected]
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
>
> Regards,
> Yong-Xuan