This patchset adds pv ipi support for VM. On physical machine, ipi HW
uses IOCSR registers, however there is trap into hypervisor when vcpu
accesses IOCSR registers if system is in VM mode. SWI is a interrupt
mechanism like SGI on ARM, software can send interrupt to CPU, only that
on LoongArch SWI can only be sent to local CPU now. So SWI can not used
for IPI on real HW system, however it can be used on VM when combined with
hypercall method. This patch uses SWI interrupt for IPI mechanism, SWI
injection uses hypercall method. And there is one trap with IPI sending,
however with SWI interrupt handler there is no trap.
Here is the microbenchmarck data with perf bench futex wake case on 3C5000
single-way machine, there are 16 cpus on 3C5000 single-way machine, VM
has 16 vcpus also. The benchmark data is ms time unit to wakeup 16 threads,
the performance is higher if data is smaller.
perf bench futex wake, Wokeup 16 of 16 threads in ms
--physical machine-- --VM original-- --VM with pv ipi patch--
0.0176 ms 0.1140 ms 0.0481 ms
---
Change in V3:
1. Add 128 vcpu ipi multicast support like x86
2. Change cpucfg base address from 0x10000000 to 0x40000000 which is
used to dectect hypervisor type, in order to avoid confliction with future
hw usage
3. Adjust patch order in this patchset, move patch
Refine-ipi-ops-on-LoongArch-platform to the first one.
Change in V2:
1. Add hw cpuid map support since ipi routing uses hw cpuid
2. Refine changelog description
3. Add hypercall statistic support for vcpu
4. Set percpu pv ipi message buffer aligned with cacheline
5. Refine pv ipi send logic, do not send ipi message with if there is
pending ipi message.
---
Bibo Mao (6):
LoongArch/smp: Refine ipi ops on LoongArch platform
LoongArch: KVM: Add hypercall instruction emulation support
LoongArch: KVM: Add cpucfg area for kvm hypervisor
LoongArch: Add paravirt interface for guest kernel
LoongArch: KVM: Add physical cpuid map support
LoongArch: Add pv ipi support on LoongArch system
arch/loongarch/Kconfig | 9 +
arch/loongarch/include/asm/Kbuild | 1 -
arch/loongarch/include/asm/hardirq.h | 5 +
arch/loongarch/include/asm/inst.h | 1 +
arch/loongarch/include/asm/irq.h | 10 +-
arch/loongarch/include/asm/kvm_host.h | 27 +++
arch/loongarch/include/asm/kvm_para.h | 157 ++++++++++++++++++
arch/loongarch/include/asm/kvm_vcpu.h | 1 +
arch/loongarch/include/asm/loongarch.h | 11 ++
arch/loongarch/include/asm/paravirt.h | 27 +++
.../include/asm/paravirt_api_clock.h | 1 +
arch/loongarch/include/asm/smp.h | 31 ++--
arch/loongarch/include/uapi/asm/Kbuild | 2 -
arch/loongarch/kernel/Makefile | 1 +
arch/loongarch/kernel/irq.c | 24 +--
arch/loongarch/kernel/paravirt.c | 154 +++++++++++++++++
arch/loongarch/kernel/perf_event.c | 14 +-
arch/loongarch/kernel/setup.c | 2 +
arch/loongarch/kernel/smp.c | 60 ++++---
arch/loongarch/kernel/time.c | 12 +-
arch/loongarch/kvm/exit.c | 125 ++++++++++++--
arch/loongarch/kvm/vcpu.c | 94 ++++++++++-
arch/loongarch/kvm/vm.c | 11 ++
23 files changed, 678 insertions(+), 102 deletions(-)
create mode 100644 arch/loongarch/include/asm/kvm_para.h
create mode 100644 arch/loongarch/include/asm/paravirt.h
create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild
create mode 100644 arch/loongarch/kernel/paravirt.c
base-commit: 7a396820222d6d4c02057f41658b162bdcdadd0e
--
2.39.3
The patch add paravirt interface for guest kernel, function
pv_guest_init firstly checks whether system runs on VM mode. If kernel
runs on VM mode, it will call function kvm_para_available to detect
whether current VMM is KVM hypervisor. And the paravirt function can work
only if current VMM is KVM hypervisor, since there is only KVM hypervisor
supported on LoongArch now.
Signed-off-by: Bibo Mao <[email protected]>
---
arch/loongarch/Kconfig | 9 ++++
arch/loongarch/include/asm/kvm_para.h | 7 ++++
arch/loongarch/include/asm/paravirt.h | 27 ++++++++++++
.../include/asm/paravirt_api_clock.h | 1 +
arch/loongarch/kernel/Makefile | 1 +
arch/loongarch/kernel/paravirt.c | 41 +++++++++++++++++++
arch/loongarch/kernel/setup.c | 2 +
7 files changed, 88 insertions(+)
create mode 100644 arch/loongarch/include/asm/paravirt.h
create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
create mode 100644 arch/loongarch/kernel/paravirt.c
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 10959e6c3583..817a56dff80f 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -585,6 +585,15 @@ config CPU_HAS_PREFETCH
bool
default y
+config PARAVIRT
+ bool "Enable paravirtualization code"
+ depends on AS_HAS_LVZ_EXTENSION
+ help
+ This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization. However, when run without a hypervisor
+ the kernel is theoretically slower and slightly larger.
+
config ARCH_SUPPORTS_KEXEC
def_bool y
diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
index 9425d3b7e486..41200e922a82 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -2,6 +2,13 @@
#ifndef _ASM_LOONGARCH_KVM_PARA_H
#define _ASM_LOONGARCH_KVM_PARA_H
+/*
+ * Hypcall code field
+ */
+#define HYPERVISOR_KVM 1
+#define HYPERVISOR_VENDOR_SHIFT 8
+#define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
+
/*
* LoongArch hypcall return code
*/
diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h
new file mode 100644
index 000000000000..b64813592ba0
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_PARAVIRT_H
+#define _ASM_LOONGARCH_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include <linux/static_call_types.h>
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+ return static_call(pv_steal_clock)(cpu);
+}
+
+int pv_guest_init(void);
+#else
+static inline int pv_guest_init(void)
+{
+ return 0;
+}
+
+#endif // CONFIG_PARAVIRT
+#endif
diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h b/arch/loongarch/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3c808c680370..662e6e9de12d 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_PROC_FS) += proc.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
new file mode 100644
index 000000000000..21d01d05791a
--- /dev/null
+++ b/arch/loongarch/kernel/paravirt.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kvm_para.h>
+#include <asm/paravirt.h>
+#include <linux/static_call.h>
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static u64 native_steal_clock(int cpu)
+{
+ return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+static bool kvm_para_available(void)
+{
+ static int hypervisor_type;
+ int config;
+
+ if (!hypervisor_type) {
+ config = read_cpucfg(CPUCFG_KVM_SIG);
+ if (!memcmp(&config, KVM_SIGNATURE, 4))
+ hypervisor_type = HYPERVISOR_KVM;
+ }
+
+ return hypervisor_type == HYPERVISOR_KVM;
+}
+
+int __init pv_guest_init(void)
+{
+ if (!cpu_has_hypervisor)
+ return 0;
+ if (!kvm_para_available())
+ return 0;
+
+ return 1;
+}
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index edf2bba80130..de5c36dccc49 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -43,6 +43,7 @@
#include <asm/efi.h>
#include <asm/loongson.h>
#include <asm/numa.h>
+#include <asm/paravirt.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/setup.h>
@@ -367,6 +368,7 @@ void __init platform_init(void)
pr_info("The BIOS Version: %s\n", b_info.bios_version);
efi_runtime_init();
+ pv_guest_init();
}
static void __init check_kernel_sections_mem(void)
--
2.39.3
Physical cpuid is used to irq routing for irqchips such as ipi/msi/
extioi interrupt controller. And physical cpuid is stored at CSR
register LOONGARCH_CSR_CPUID, it can not be changed once vcpu is
created. Since different irqchips have different size definition
about physical cpuid, KVM uses the smallest cpuid from extioi, and
the max cpuid size is defines as 256.
Signed-off-by: Bibo Mao <[email protected]>
---
arch/loongarch/include/asm/kvm_host.h | 26 ++++++++
arch/loongarch/include/asm/kvm_vcpu.h | 1 +
arch/loongarch/kvm/vcpu.c | 93 ++++++++++++++++++++++++++-
arch/loongarch/kvm/vm.c | 11 ++++
4 files changed, 130 insertions(+), 1 deletion(-)
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index 2d62f7b0d377..57399d7cf8b7 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -64,6 +64,30 @@ struct kvm_world_switch {
#define MAX_PGTABLE_LEVELS 4
+/*
+ * Physical cpu id is used for interrupt routing, there are different
+ * definitions about physical cpuid on different hardwares.
+ * For LOONGARCH_CSR_CPUID register, max cpuid size if 512
+ * For IPI HW, max dest CPUID size 1024
+ * For extioi interrupt controller, max dest CPUID size is 256
+ * For MSI interrupt controller, max supported CPUID size is 65536
+ *
+ * Currently max CPUID is defined as 256 for KVM hypervisor, in future
+ * it will be expanded to 4096, including 16 packages at most. And every
+ * package supports at most 256 vcpus
+ */
+#define KVM_MAX_PHYID 256
+
+struct kvm_phyid_info {
+ struct kvm_vcpu *vcpu;
+ bool enabled;
+};
+
+struct kvm_phyid_map {
+ int max_phyid;
+ struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
+};
+
struct kvm_arch {
/* Guest physical mm */
kvm_pte_t *pgd;
@@ -71,6 +95,8 @@ struct kvm_arch {
unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
unsigned int pte_shifts[MAX_PGTABLE_LEVELS];
unsigned int root_level;
+ struct mutex phyid_map_lock;
+ struct kvm_phyid_map *phyid_map;
s64 time_offset;
struct kvm_context __percpu *vmcs;
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
index e71ceb88f29e..2402129ee955 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
void kvm_restore_timer(struct kvm_vcpu *vcpu);
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
/*
* Loongarch KVM guest interrupt handling
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 27701991886d..97ca9c7160e6 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
return 0;
}
+static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
+{
+ int cpuid;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+ struct kvm_phyid_map *map;
+
+ if (val >= KVM_MAX_PHYID)
+ return -EINVAL;
+
+ cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+ map = vcpu->kvm->arch.phyid_map;
+ mutex_lock(&vcpu->kvm->arch.phyid_map_lock);
+ if (map->phys_map[cpuid].enabled) {
+ /*
+ * Cpuid is already set before
+ * Forbid changing different cpuid at runtime
+ */
+ if (cpuid != val) {
+ /*
+ * Cpuid 0 is initial value for vcpu, maybe invalid
+ * unset value for vcpu
+ */
+ if (cpuid) {
+ mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return -EINVAL;
+ }
+ } else {
+ /* Discard duplicated cpuid set */
+ mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return 0;
+ }
+ }
+
+ if (map->phys_map[val].enabled) {
+ /*
+ * New cpuid is already set with other vcpu
+ * Forbid sharing the same cpuid between different vcpus
+ */
+ if (map->phys_map[val].vcpu != vcpu) {
+ mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return -EINVAL;
+ }
+
+ /* Discard duplicated cpuid set operation*/
+ mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return 0;
+ }
+
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
+ map->phys_map[val].enabled = true;
+ map->phys_map[val].vcpu = vcpu;
+ if (map->max_phyid < val)
+ map->max_phyid = val;
+ mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return 0;
+}
+
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
+{
+ struct kvm_phyid_map *map;
+
+ if (cpuid >= KVM_MAX_PHYID)
+ return NULL;
+
+ map = kvm->arch.phyid_map;
+ if (map->phys_map[cpuid].enabled)
+ return map->phys_map[cpuid].vcpu;
+
+ return NULL;
+}
+
+static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
+{
+ int cpuid;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+ struct kvm_phyid_map *map;
+
+ map = vcpu->kvm->arch.phyid_map;
+ cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+ if (cpuid >= KVM_MAX_PHYID)
+ return;
+
+ if (map->phys_map[cpuid].enabled) {
+ map->phys_map[cpuid].vcpu = NULL;
+ map->phys_map[cpuid].enabled = false;
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0);
+ }
+}
+
static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
{
int ret = 0, gintc;
@@ -291,7 +380,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc);
return ret;
- }
+ } else if (id == LOONGARCH_CSR_CPUID)
+ return kvm_set_cpuid(vcpu, val);
kvm_write_sw_gcsr(csr, id, val);
@@ -925,6 +1015,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
hrtimer_cancel(&vcpu->arch.swtimer);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
kfree(vcpu->arch.csr);
+ kvm_drop_cpuid(vcpu);
/*
* If the vCPU is freed and reused as another vCPU, we don't want the
diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
index 0a37f6fa8f2d..6fd5916ebef3 100644
--- a/arch/loongarch/kvm/vm.c
+++ b/arch/loongarch/kvm/vm.c
@@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.pgd)
return -ENOMEM;
+ kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map),
+ GFP_KERNEL_ACCOUNT);
+ if (!kvm->arch.phyid_map) {
+ free_page((unsigned long)kvm->arch.pgd);
+ kvm->arch.pgd = NULL;
+ return -ENOMEM;
+ }
+
kvm_init_vmcs(kvm);
kvm->arch.gpa_size = BIT(cpu_vabits - 1);
kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
@@ -44,6 +52,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
for (i = 0; i <= kvm->arch.root_level; i++)
kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3);
+ mutex_init(&kvm->arch.phyid_map_lock);
return 0;
}
@@ -51,7 +60,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_destroy_vcpus(kvm);
free_page((unsigned long)kvm->arch.pgd);
+ kvfree(kvm->arch.phyid_map);
kvm->arch.pgd = NULL;
+ kvm->arch.phyid_map = NULL;
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
--
2.39.3
On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
register access on ipi sender and two iocsr access on ipi receiver
which is ipi interrupt handler. On VM mode all iocsr registers
accessing will trap into hypervisor. So with one ipi hw notification
there will be three times of trap.
This patch adds pv ipi support for VM, hypercall instruction is used
to ipi sender, and hypervisor will inject SWI on the VM. During SWI
interrupt handler, only estat CSR register is written to clear irq.
Estat CSR register access will not trap into hypervisor. So with pv ipi
supported, pv ipi sender will trap into hypervsor one time, pv ipi
revicer will not trap, there is only one time of trap.
Also this patch adds ipi multicast support, the method is similar with
x86. With ipi multicast support, ipi notification can be sent to at most
128 vcpus at one time. It reduces trap into hypervisor greatly.
Signed-off-by: Bibo Mao <[email protected]>
---
arch/loongarch/include/asm/hardirq.h | 1 +
arch/loongarch/include/asm/kvm_host.h | 1 +
arch/loongarch/include/asm/kvm_para.h | 124 +++++++++++++++++++++++++
arch/loongarch/include/asm/loongarch.h | 1 +
arch/loongarch/kernel/irq.c | 2 +-
arch/loongarch/kernel/paravirt.c | 113 ++++++++++++++++++++++
arch/loongarch/kernel/smp.c | 2 +-
arch/loongarch/kvm/exit.c | 73 ++++++++++++++-
arch/loongarch/kvm/vcpu.c | 1 +
9 files changed, 314 insertions(+), 4 deletions(-)
diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
index 9f0038e19c7f..8a611843c1f0 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -21,6 +21,7 @@ enum ipi_msg_type {
typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
+ atomic_t messages ____cacheline_aligned_in_smp;
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index 57399d7cf8b7..1bf927e2bfac 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
u64 idle_exits;
u64 cpucfg_exits;
u64 signal_exits;
+ u64 hvcl_exits;
};
#define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
index 41200e922a82..a25a84e372b9 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -9,6 +9,10 @@
#define HYPERVISOR_VENDOR_SHIFT 8
#define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
+#define KVM_HC_CODE_SERVICE 0
+#define KVM_HC_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HC_CODE_SERVICE)
+#define KVM_HC_FUNC_IPI 1
+
/*
* LoongArch hypcall return code
*/
@@ -16,6 +20,126 @@
#define KVM_HC_INVALID_CODE -1UL
#define KVM_HC_INVALID_PARAMETER -2UL
+/*
+ * Hypercalls interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in v0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall(u64 fid)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r" (fun)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r" (fun), "r" (a1)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+ unsigned long arg0, unsigned long arg1)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r" (fun), "r" (a1), "r" (a2)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall3(u64 fid,
+ unsigned long arg0, unsigned long arg1, unsigned long arg2)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+ register unsigned long a3 asm("a3") = arg2;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r" (fun), "r" (a1), "r" (a2), "r" (a3)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall4(u64 fid,
+ unsigned long arg0, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+ register unsigned long a3 asm("a3") = arg2;
+ register unsigned long a4 asm("a4") = arg3;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall5(u64 fid,
+ unsigned long arg0, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+ register unsigned long a3 asm("a3") = arg2;
+ register unsigned long a4 asm("a4") = arg3;
+ register unsigned long a5 asm("a5") = arg4;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5)
+ : "memory"
+ );
+
+ return ret;
+}
+
+
static inline unsigned int kvm_arch_para_features(void)
{
return 0;
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index a1d22e8b6f94..0ad36704cb4b 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -167,6 +167,7 @@
#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
#define KVM_SIGNATURE "KVM\0"
#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
+#define KVM_FEATURE_PV_IPI BIT(1)
#ifndef __ASSEMBLY__
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 1b58f7c3eed9..b5bd298c981f 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -113,5 +113,5 @@ void __init init_IRQ(void)
per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
}
- set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
+ set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
}
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index 21d01d05791a..b840a004995a 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/types.h>
+#include <linux/interrupt.h>
#include <linux/jump_label.h>
#include <linux/kvm_para.h>
#include <asm/paravirt.h>
@@ -16,6 +17,104 @@ static u64 native_steal_clock(int cpu)
DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+#ifdef CONFIG_SMP
+static void pv_send_ipi_single(int cpu, unsigned int action)
+{
+ unsigned int min, old;
+ unsigned long bitmap = 0;
+ irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
+
+ action = BIT(action);
+ old = atomic_fetch_or(action, &info->messages);
+ if (old == 0) {
+ min = cpu_logical_map(cpu);
+ bitmap = 1;
+ kvm_hypercall3(KVM_HC_FUNC_IPI, bitmap, 0, min);
+ }
+}
+
+#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
+static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+ unsigned int cpu, i, min = 0, max = 0, old;
+ __uint128_t bitmap = 0;
+ irq_cpustat_t *info;
+
+ if (cpumask_empty(mask))
+ return;
+
+ action = BIT(action);
+ for_each_cpu(i, mask) {
+ info = &per_cpu(irq_stat, i);
+ old = atomic_fetch_or(action, &info->messages);
+ if (old)
+ continue;
+
+ cpu = cpu_logical_map(i);
+ if (!bitmap) {
+ min = max = cpu;
+ } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) {
+ max = cpu > max ? cpu : max;
+ } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) {
+ bitmap <<= min - cpu;
+ min = cpu;
+ } else {
+ /*
+ * Physical cpuid is sorted in ascending order ascend
+ * for the next mask calculation, send IPI here
+ * directly and skip the remainding cpus
+ */
+ kvm_hypercall3(KVM_HC_FUNC_IPI, (unsigned long)bitmap,
+ (unsigned long)(bitmap >> BITS_PER_LONG), min);
+ min = max = cpu;
+ bitmap = 0;
+ }
+ __set_bit(cpu - min, (unsigned long *)&bitmap);
+ }
+
+ if (bitmap)
+ kvm_hypercall3(KVM_HC_FUNC_IPI, (unsigned long)bitmap,
+ (unsigned long)(bitmap >> BITS_PER_LONG), min);
+}
+
+static irqreturn_t loongson_do_swi(int irq, void *dev)
+{
+ irq_cpustat_t *info;
+ long action;
+
+ clear_csr_estat(1 << INT_SWI0);
+
+ info = this_cpu_ptr(&irq_stat);
+ do {
+ action = atomic_xchg(&info->messages, 0);
+ if (action & SMP_CALL_FUNCTION) {
+ generic_smp_call_function_interrupt();
+ info->ipi_irqs[IPI_CALL_FUNCTION]++;
+ }
+
+ if (action & SMP_RESCHEDULE) {
+ scheduler_ipi();
+ info->ipi_irqs[IPI_RESCHEDULE]++;
+ }
+ } while (action);
+
+ return IRQ_HANDLED;
+}
+
+static void pv_ipi_init(void)
+{
+ int r, swi0;
+
+ swi0 = get_percpu_irq(INT_SWI0);
+ if (swi0 < 0)
+ panic("SWI0 IRQ mapping failed\n");
+ irq_set_percpu_devid(swi0);
+ r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
+ if (r < 0)
+ panic("SWI0 IRQ request failed\n");
+}
+#endif
+
static bool kvm_para_available(void)
{
static int hypervisor_type;
@@ -32,10 +131,24 @@ static bool kvm_para_available(void)
int __init pv_guest_init(void)
{
+ int feature;
+
if (!cpu_has_hypervisor)
return 0;
if (!kvm_para_available())
return 0;
+ /*
+ * check whether KVM hypervisor supports pv_ipi or not
+ */
+#ifdef CONFIG_SMP
+ feature = read_cpucfg(CPUCFG_KVM_FEATURE);
+ if (feature & KVM_FEATURE_PV_IPI) {
+ smp_ops.call_func_single_ipi = pv_send_ipi_single;
+ smp_ops.call_func_ipi = pv_send_ipi_mask;
+ smp_ops.ipi_init = pv_ipi_init;
+ }
+#endif
+
return 1;
}
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 46735ba49815..57b5706cedb9 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -285,7 +285,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
void loongson_init_secondary(void)
{
unsigned int cpu = smp_processor_id();
- unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
+ unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
change_csr_ecfg(ECFG0_IM, imask);
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index f4e4df05f578..189b70bad825 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -227,6 +227,9 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
case CPUCFG_KVM_SIG:
vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
break;
+ case CPUCFG_KVM_FEATURE:
+ vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
+ break;
default:
vcpu->arch.gprs[rd] = 0;
break;
@@ -699,12 +702,78 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
+static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu)
+{
+ unsigned long ipi_bitmap;
+ unsigned int min, cpu, i;
+ struct kvm_vcpu *dest;
+
+ min = vcpu->arch.gprs[LOONGARCH_GPR_A3];
+ for (i = 0; i < 2; i++) {
+ ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i];
+ if (!ipi_bitmap)
+ continue;
+
+ cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG);
+ while (cpu < BITS_PER_LONG) {
+ dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min);
+ cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG,
+ cpu + 1);
+ if (!dest)
+ continue;
+
+ /*
+ * Send SWI0 to dest vcpu to emulate IPI interrupt
+ */
+ kvm_queue_irq(dest, INT_SWI0);
+ kvm_vcpu_kick(dest);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * hypcall emulation always return to guest, Caller should check retval.
+ */
+static void kvm_handle_pv_hcall(struct kvm_vcpu *vcpu)
+{
+ unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0];
+ long ret;
+
+ switch (func) {
+ case KVM_HC_FUNC_IPI:
+ kvm_pv_send_ipi(vcpu);
+ ret = KVM_HC_STATUS_SUCCESS;
+ break;
+ default:
+ ret = KVM_HC_INVALID_CODE;
+ break;
+ };
+
+ vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret;
+}
+
static int kvm_handle_hypcall(struct kvm_vcpu *vcpu)
{
+ larch_inst inst;
+ unsigned int code;
+
+ inst.word = vcpu->arch.badi;
+ code = inst.reg0i15_format.immediate;
update_pc(&vcpu->arch);
- /* Treat it as noop intruction, only set return value */
- vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
+ switch (code) {
+ case KVM_HC_SERVICE:
+ vcpu->stat.hvcl_exits++;
+ kvm_handle_pv_hcall(vcpu);
+ break;
+ default:
+ /* Treat it as noop intruction, only set return value */
+ vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
+ break;
+ }
+
return RESUME_GUEST;
}
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 97ca9c7160e6..80e05ba9b48d 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, idle_exits),
STATS_DESC_COUNTER(VCPU, cpucfg_exits),
STATS_DESC_COUNTER(VCPU, signal_exits),
+ STATS_DESC_COUNTER(VCPU, hvcl_exits)
};
const struct kvm_stats_header kvm_vcpu_stats_header = {
--
2.39.3
This patch refines ipi handling on LoongArch platform, there are
three changes with this patch.
1. Add generic get_percpu_irq api, replace some percpu irq function
such as get_ipi_irq/get_pmc_irq/get_timer_irq with get_percpu_irq.
2. Change parameter action definition with function
loongson_send_ipi_single and loongson_send_ipi_mask. Code encoding is used
here rather than bitmap encoding for ipi action, ipi hw sender uses action
code, and ipi receiver will get action bitmap encoding, the ipi hw will
convert it into bitmap in ipi message buffer.
3. Add smp_ops on LoongArch platform so that pv ipi can be used later.
Signed-off-by: Bibo Mao <[email protected]>
---
arch/loongarch/include/asm/hardirq.h | 4 ++
arch/loongarch/include/asm/irq.h | 10 ++++-
arch/loongarch/include/asm/smp.h | 31 +++++++--------
arch/loongarch/kernel/irq.c | 22 +----------
arch/loongarch/kernel/perf_event.c | 14 +------
arch/loongarch/kernel/smp.c | 58 +++++++++++++++++++---------
arch/loongarch/kernel/time.c | 12 +-----
7 files changed, 71 insertions(+), 80 deletions(-)
diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
index 0ef3b18f8980..9f0038e19c7f 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,6 +12,10 @@
extern void ack_bad_irq(unsigned int irq);
#define ack_bad_irq ack_bad_irq
+enum ipi_msg_type {
+ IPI_RESCHEDULE,
+ IPI_CALL_FUNCTION,
+};
#define NR_IPI 2
typedef struct {
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 218b4da0ea90..00101b6d601e 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -117,8 +117,16 @@ extern struct fwnode_handle *liointc_handle;
extern struct fwnode_handle *pch_lpc_handle;
extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
-extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
+static inline int get_percpu_irq(int vector)
+{
+ struct irq_domain *d;
+
+ d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+ if (d)
+ return irq_create_mapping(d, vector);
+ return -EINVAL;
+}
#include <asm-generic/irq.h>
#endif /* _ASM_IRQ_H */
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index f81e5f01d619..330f1cb3741c 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -12,6 +12,13 @@
#include <linux/threads.h>
#include <linux/cpumask.h>
+struct smp_ops {
+ void (*call_func_ipi)(const struct cpumask *mask, unsigned int action);
+ void (*call_func_single_ipi)(int cpu, unsigned int action);
+ void (*ipi_init)(void);
+};
+
+extern struct smp_ops smp_ops;
extern int smp_num_siblings;
extern int num_processors;
extern int disabled_cpus;
@@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
void loongson_boot_secondary(int cpu, struct task_struct *idle);
void loongson_init_secondary(void);
void loongson_smp_finish(void);
-void loongson_send_ipi_single(int cpu, unsigned int action);
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
#ifdef CONFIG_HOTPLUG_CPU
int loongson_cpu_disable(void);
void loongson_cpu_die(unsigned int cpu);
@@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
#define cpu_physical_id(cpu) cpu_logical_map(cpu)
-#define SMP_BOOT_CPU 0x1
-#define SMP_RESCHEDULE 0x2
-#define SMP_CALL_FUNCTION 0x4
+#define ACTTION_BOOT_CPU 0
+#define ACTTION_RESCHEDULE 1
+#define ACTTION_CALL_FUNCTION 2
+#define SMP_BOOT_CPU BIT(ACTTION_BOOT_CPU)
+#define SMP_RESCHEDULE BIT(ACTTION_RESCHEDULE)
+#define SMP_CALL_FUNCTION BIT(ACTTION_CALL_FUNCTION)
struct secondary_data {
unsigned long stack;
@@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data;
extern asmlinkage void smpboot_entry(void);
extern asmlinkage void start_secondary(void);
-
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
extern void calculate_cpu_foreign_map(void);
/*
@@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void);
*/
extern void show_ipi_list(struct seq_file *p, int prec);
-static inline void arch_send_call_function_single_ipi(int cpu)
-{
- loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
-}
-
-static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
-{
- loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
-}
-
#ifdef CONFIG_HOTPLUG_CPU
static inline int __cpu_disable(void)
{
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 883e5066ae44..1b58f7c3eed9 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void)
acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse);
}
-static int __init get_ipi_irq(void)
-{
- struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
-
- if (d)
- return irq_create_mapping(d, INT_IPI);
-
- return -EINVAL;
-}
-
void __init init_IRQ(void)
{
int i;
-#ifdef CONFIG_SMP
- int r, ipi_irq;
- static int ipi_dummy_dev;
-#endif
unsigned int order = get_order(IRQ_STACK_SIZE);
struct page *page;
@@ -113,13 +99,7 @@ void __init init_IRQ(void)
init_vec_parent_group();
irqchip_init();
#ifdef CONFIG_SMP
- ipi_irq = get_ipi_irq();
- if (ipi_irq < 0)
- panic("IPI IRQ mapping failed\n");
- irq_set_percpu_devid(ipi_irq);
- r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev);
- if (r < 0)
- panic("IPI IRQ request failed\n");
+ smp_ops.ipi_init();
#endif
for (i = 0; i < NR_IRQS; i++)
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
index 0491bf453cd4..3265c8f33223 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -456,16 +456,6 @@ static void loongarch_pmu_disable(struct pmu *pmu)
static DEFINE_MUTEX(pmu_reserve_mutex);
static atomic_t active_events = ATOMIC_INIT(0);
-static int get_pmc_irq(void)
-{
- struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
-
- if (d)
- return irq_create_mapping(d, INT_PCOV);
-
- return -EINVAL;
-}
-
static void reset_counters(void *arg);
static int __hw_perf_event_init(struct perf_event *event);
@@ -473,7 +463,7 @@ static void hw_perf_event_destroy(struct perf_event *event)
{
if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
on_each_cpu(reset_counters, NULL, 1);
- free_irq(get_pmc_irq(), &loongarch_pmu);
+ free_irq(get_percpu_irq(INT_PCOV), &loongarch_pmu);
mutex_unlock(&pmu_reserve_mutex);
}
}
@@ -562,7 +552,7 @@ static int loongarch_pmu_event_init(struct perf_event *event)
if (event->cpu >= 0 && !cpu_online(event->cpu))
return -ENODEV;
- irq = get_pmc_irq();
+ irq = get_percpu_irq(INT_PCOV);
flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED;
if (!atomic_inc_not_zero(&active_events)) {
mutex_lock(&pmu_reserve_mutex);
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index a16e3dbe9f09..46735ba49815 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -66,11 +66,6 @@ static cpumask_t cpu_core_setup_map;
struct secondary_data cpuboot_data;
static DEFINE_PER_CPU(int, cpu_state);
-enum ipi_msg_type {
- IPI_RESCHEDULE,
- IPI_CALL_FUNCTION,
-};
-
static const char *ipi_types[NR_IPI] __tracepoint_string = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNCTION] = "Function call interrupts",
@@ -123,24 +118,19 @@ static u32 ipi_read_clear(int cpu)
static void ipi_write_action(int cpu, u32 action)
{
- unsigned int irq = 0;
-
- while ((irq = ffs(action))) {
- uint32_t val = IOCSR_IPI_SEND_BLOCKING;
+ uint32_t val;
- val |= (irq - 1);
- val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
- iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
- action &= ~BIT(irq - 1);
- }
+ val = IOCSR_IPI_SEND_BLOCKING | action;
+ val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
+ iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
}
-void loongson_send_ipi_single(int cpu, unsigned int action)
+static void loongson_send_ipi_single(int cpu, unsigned int action)
{
ipi_write_action(cpu_logical_map(cpu), (u32)action);
}
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+static void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
@@ -148,6 +138,16 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
ipi_write_action(cpu_logical_map(i), (u32)action);
}
+void arch_send_call_function_single_ipi(int cpu)
+{
+ smp_ops.call_func_single_ipi(cpu, ACTTION_CALL_FUNCTION);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+ smp_ops.call_func_ipi(mask, ACTTION_CALL_FUNCTION);
+}
+
/*
* This function sends a 'reschedule' IPI to another CPU.
* it goes straight through and wastes no time serializing
@@ -155,11 +155,11 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
*/
void arch_smp_send_reschedule(int cpu)
{
- loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
+ smp_ops.call_func_single_ipi(cpu, ACTTION_RESCHEDULE);
}
EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
-irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
+static irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
{
unsigned int action;
unsigned int cpu = smp_processor_id();
@@ -179,6 +179,26 @@ irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
return IRQ_HANDLED;
}
+static void loongson_ipi_init(void)
+{
+ int r, ipi_irq;
+
+ ipi_irq = get_percpu_irq(INT_IPI);
+ if (ipi_irq < 0)
+ panic("IPI IRQ mapping failed\n");
+
+ irq_set_percpu_devid(ipi_irq);
+ r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &irq_stat);
+ if (r < 0)
+ panic("IPI IRQ request failed\n");
+}
+
+struct smp_ops smp_ops = {
+ .call_func_single_ipi = loongson_send_ipi_single,
+ .call_func_ipi = loongson_send_ipi_mask,
+ .ipi_init = loongson_ipi_init,
+};
+
static void __init fdt_smp_setup(void)
{
#ifdef CONFIG_OF
@@ -256,7 +276,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
csr_mail_send(entry, cpu_logical_map(cpu), 0);
- loongson_send_ipi_single(cpu, SMP_BOOT_CPU);
+ loongson_send_ipi_single(cpu, ACTTION_BOOT_CPU);
}
/*
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index e7015f7b70e3..fd5354f9be7c 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -123,16 +123,6 @@ void sync_counter(void)
csr_write64(init_offset, LOONGARCH_CSR_CNTC);
}
-static int get_timer_irq(void)
-{
- struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
-
- if (d)
- return irq_create_mapping(d, INT_TI);
-
- return -EINVAL;
-}
-
int constant_clockevent_init(void)
{
unsigned int cpu = smp_processor_id();
@@ -142,7 +132,7 @@ int constant_clockevent_init(void)
static int irq = 0, timer_irq_installed = 0;
if (!timer_irq_installed) {
- irq = get_timer_irq();
+ irq = get_percpu_irq(INT_TI);
if (irq < 0)
pr_err("Failed to map irq %d (timer)\n", irq);
}
--
2.39.3
On LoongArch system, hypercall instruction is supported when system
runs on VM mode. This patch adds dummy function with hypercall
instruction emulation, rather than inject EXCCODE_INE invalid
instruction exception.
Signed-off-by: Bibo Mao <[email protected]>
---
arch/loongarch/include/asm/Kbuild | 1 -
arch/loongarch/include/asm/kvm_para.h | 26 ++++++++++++++++++++++++++
arch/loongarch/include/uapi/asm/Kbuild | 2 --
arch/loongarch/kvm/exit.c | 10 ++++++++++
4 files changed, 36 insertions(+), 3 deletions(-)
create mode 100644 arch/loongarch/include/asm/kvm_para.h
delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 93783fa24f6e..22991a6f0e2b 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -23,4 +23,3 @@ generic-y += poll.h
generic-y += param.h
generic-y += posix_types.h
generic-y += resource.h
-generic-y += kvm_para.h
diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
new file mode 100644
index 000000000000..9425d3b7e486
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_KVM_PARA_H
+#define _ASM_LOONGARCH_KVM_PARA_H
+
+/*
+ * LoongArch hypcall return code
+ */
+#define KVM_HC_STATUS_SUCCESS 0
+#define KVM_HC_INVALID_CODE -1UL
+#define KVM_HC_INVALID_PARAMETER -2UL
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+ return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+ return false;
+}
+#endif /* _ASM_LOONGARCH_KVM_PARA_H */
diff --git a/arch/loongarch/include/uapi/asm/Kbuild b/arch/loongarch/include/uapi/asm/Kbuild
deleted file mode 100644
index 4aa680ca2e5f..000000000000
--- a/arch/loongarch/include/uapi/asm/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += kvm_para.h
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index ed1d89d53e2e..d15c71320a11 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -685,6 +685,15 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
+static int kvm_handle_hypcall(struct kvm_vcpu *vcpu)
+{
+ update_pc(&vcpu->arch);
+
+ /* Treat it as noop intruction, only set return value */
+ vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
+ return RESUME_GUEST;
+}
+
/*
* LoongArch KVM callback handling for unimplemented guest exiting
*/
@@ -716,6 +725,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = {
[EXCCODE_LSXDIS] = kvm_handle_lsx_disabled,
[EXCCODE_LASXDIS] = kvm_handle_lasx_disabled,
[EXCCODE_GSPR] = kvm_handle_gspr,
+ [EXCCODE_HVC] = kvm_handle_hypcall,
};
int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault)
--
2.39.3
Hi, Bibo,
On Mon, Jan 22, 2024 at 6:03 PM Bibo Mao <[email protected]> wrote:
>
> This patch refines ipi handling on LoongArch platform, there are
> three changes with this patch.
> 1. Add generic get_percpu_irq api, replace some percpu irq function
> such as get_ipi_irq/get_pmc_irq/get_timer_irq with get_percpu_irq.
>
> 2. Change parameter action definition with function
> loongson_send_ipi_single and loongson_send_ipi_mask. Code encoding is used
> here rather than bitmap encoding for ipi action, ipi hw sender uses action
> code, and ipi receiver will get action bitmap encoding, the ipi hw will
> convert it into bitmap in ipi message buffer.
>
> 3. Add smp_ops on LoongArch platform so that pv ipi can be used later.
>
> Signed-off-by: Bibo Mao <[email protected]>
> ---
> arch/loongarch/include/asm/hardirq.h | 4 ++
> arch/loongarch/include/asm/irq.h | 10 ++++-
> arch/loongarch/include/asm/smp.h | 31 +++++++--------
> arch/loongarch/kernel/irq.c | 22 +----------
> arch/loongarch/kernel/perf_event.c | 14 +------
> arch/loongarch/kernel/smp.c | 58 +++++++++++++++++++---------
> arch/loongarch/kernel/time.c | 12 +-----
> 7 files changed, 71 insertions(+), 80 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
> index 0ef3b18f8980..9f0038e19c7f 100644
> --- a/arch/loongarch/include/asm/hardirq.h
> +++ b/arch/loongarch/include/asm/hardirq.h
> @@ -12,6 +12,10 @@
> extern void ack_bad_irq(unsigned int irq);
> #define ack_bad_irq ack_bad_irq
>
> +enum ipi_msg_type {
> + IPI_RESCHEDULE,
> + IPI_CALL_FUNCTION,
> +};
> #define NR_IPI 2
>
> typedef struct {
> diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
> index 218b4da0ea90..00101b6d601e 100644
> --- a/arch/loongarch/include/asm/irq.h
> +++ b/arch/loongarch/include/asm/irq.h
> @@ -117,8 +117,16 @@ extern struct fwnode_handle *liointc_handle;
> extern struct fwnode_handle *pch_lpc_handle;
> extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
>
> -extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
> +static inline int get_percpu_irq(int vector)
> +{
> + struct irq_domain *d;
> +
> + d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
> + if (d)
> + return irq_create_mapping(d, vector);
>
> + return -EINVAL;
> +}
> #include <asm-generic/irq.h>
>
> #endif /* _ASM_IRQ_H */
> diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
> index f81e5f01d619..330f1cb3741c 100644
> --- a/arch/loongarch/include/asm/smp.h
> +++ b/arch/loongarch/include/asm/smp.h
> @@ -12,6 +12,13 @@
> #include <linux/threads.h>
> #include <linux/cpumask.h>
>
> +struct smp_ops {
> + void (*call_func_ipi)(const struct cpumask *mask, unsigned int action);
> + void (*call_func_single_ipi)(int cpu, unsigned int action);
To keep consistency, it is better to use call_func_ipi_single and
call_func_ipi_mask.
> + void (*ipi_init)(void);
> +};
> +
> +extern struct smp_ops smp_ops;
> extern int smp_num_siblings;
> extern int num_processors;
> extern int disabled_cpus;
> @@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
> void loongson_boot_secondary(int cpu, struct task_struct *idle);
> void loongson_init_secondary(void);
> void loongson_smp_finish(void);
> -void loongson_send_ipi_single(int cpu, unsigned int action);
> -void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
> #ifdef CONFIG_HOTPLUG_CPU
> int loongson_cpu_disable(void);
> void loongson_cpu_die(unsigned int cpu);
> @@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
>
> #define cpu_physical_id(cpu) cpu_logical_map(cpu)
>
> -#define SMP_BOOT_CPU 0x1
> -#define SMP_RESCHEDULE 0x2
> -#define SMP_CALL_FUNCTION 0x4
> +#define ACTTION_BOOT_CPU 0
> +#define ACTTION_RESCHEDULE 1
> +#define ACTTION_CALL_FUNCTION 2
> +#define SMP_BOOT_CPU BIT(ACTTION_BOOT_CPU)
> +#define SMP_RESCHEDULE BIT(ACTTION_RESCHEDULE)
> +#define SMP_CALL_FUNCTION BIT(ACTTION_CALL_FUNCTION)
>
> struct secondary_data {
> unsigned long stack;
> @@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data;
>
> extern asmlinkage void smpboot_entry(void);
> extern asmlinkage void start_secondary(void);
> -
> +extern void arch_send_call_function_single_ipi(int cpu);
> +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
Similarly, to keep consistency, it is better to use
arch_send_function_ipi_single and arch_send_function_ipi_mask.
Huacai
> extern void calculate_cpu_foreign_map(void);
>
> /*
> @@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void);
> */
> extern void show_ipi_list(struct seq_file *p, int prec);
>
> -static inline void arch_send_call_function_single_ipi(int cpu)
> -{
> - loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
> -}
> -
> -static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
> -{
> - loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
> -}
> -
> #ifdef CONFIG_HOTPLUG_CPU
> static inline int __cpu_disable(void)
> {
> diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
> index 883e5066ae44..1b58f7c3eed9 100644
> --- a/arch/loongarch/kernel/irq.c
> +++ b/arch/loongarch/kernel/irq.c
> @@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void)
> acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse);
> }
>
> -static int __init get_ipi_irq(void)
> -{
> - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
> -
> - if (d)
> - return irq_create_mapping(d, INT_IPI);
> -
> - return -EINVAL;
> -}
> -
> void __init init_IRQ(void)
> {
> int i;
> -#ifdef CONFIG_SMP
> - int r, ipi_irq;
> - static int ipi_dummy_dev;
> -#endif
> unsigned int order = get_order(IRQ_STACK_SIZE);
> struct page *page;
>
> @@ -113,13 +99,7 @@ void __init init_IRQ(void)
> init_vec_parent_group();
> irqchip_init();
> #ifdef CONFIG_SMP
> - ipi_irq = get_ipi_irq();
> - if (ipi_irq < 0)
> - panic("IPI IRQ mapping failed\n");
> - irq_set_percpu_devid(ipi_irq);
> - r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev);
> - if (r < 0)
> - panic("IPI IRQ request failed\n");
> + smp_ops.ipi_init();
> #endif
>
> for (i = 0; i < NR_IRQS; i++)
> diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
> index 0491bf453cd4..3265c8f33223 100644
> --- a/arch/loongarch/kernel/perf_event.c
> +++ b/arch/loongarch/kernel/perf_event.c
> @@ -456,16 +456,6 @@ static void loongarch_pmu_disable(struct pmu *pmu)
> static DEFINE_MUTEX(pmu_reserve_mutex);
> static atomic_t active_events = ATOMIC_INIT(0);
>
> -static int get_pmc_irq(void)
> -{
> - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
> -
> - if (d)
> - return irq_create_mapping(d, INT_PCOV);
> -
> - return -EINVAL;
> -}
> -
> static void reset_counters(void *arg);
> static int __hw_perf_event_init(struct perf_event *event);
>
> @@ -473,7 +463,7 @@ static void hw_perf_event_destroy(struct perf_event *event)
> {
> if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
> on_each_cpu(reset_counters, NULL, 1);
> - free_irq(get_pmc_irq(), &loongarch_pmu);
> + free_irq(get_percpu_irq(INT_PCOV), &loongarch_pmu);
> mutex_unlock(&pmu_reserve_mutex);
> }
> }
> @@ -562,7 +552,7 @@ static int loongarch_pmu_event_init(struct perf_event *event)
> if (event->cpu >= 0 && !cpu_online(event->cpu))
> return -ENODEV;
>
> - irq = get_pmc_irq();
> + irq = get_percpu_irq(INT_PCOV);
> flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED;
> if (!atomic_inc_not_zero(&active_events)) {
> mutex_lock(&pmu_reserve_mutex);
> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
> index a16e3dbe9f09..46735ba49815 100644
> --- a/arch/loongarch/kernel/smp.c
> +++ b/arch/loongarch/kernel/smp.c
> @@ -66,11 +66,6 @@ static cpumask_t cpu_core_setup_map;
> struct secondary_data cpuboot_data;
> static DEFINE_PER_CPU(int, cpu_state);
>
> -enum ipi_msg_type {
> - IPI_RESCHEDULE,
> - IPI_CALL_FUNCTION,
> -};
> -
> static const char *ipi_types[NR_IPI] __tracepoint_string = {
> [IPI_RESCHEDULE] = "Rescheduling interrupts",
> [IPI_CALL_FUNCTION] = "Function call interrupts",
> @@ -123,24 +118,19 @@ static u32 ipi_read_clear(int cpu)
>
> static void ipi_write_action(int cpu, u32 action)
> {
> - unsigned int irq = 0;
> -
> - while ((irq = ffs(action))) {
> - uint32_t val = IOCSR_IPI_SEND_BLOCKING;
> + uint32_t val;
>
> - val |= (irq - 1);
> - val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
> - iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
> - action &= ~BIT(irq - 1);
> - }
> + val = IOCSR_IPI_SEND_BLOCKING | action;
> + val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
> + iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
> }
>
> -void loongson_send_ipi_single(int cpu, unsigned int action)
> +static void loongson_send_ipi_single(int cpu, unsigned int action)
> {
> ipi_write_action(cpu_logical_map(cpu), (u32)action);
> }
>
> -void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
> +static void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
> {
> unsigned int i;
>
> @@ -148,6 +138,16 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
> ipi_write_action(cpu_logical_map(i), (u32)action);
> }
>
> +void arch_send_call_function_single_ipi(int cpu)
> +{
> + smp_ops.call_func_single_ipi(cpu, ACTTION_CALL_FUNCTION);
> +}
> +
> +void arch_send_call_function_ipi_mask(const struct cpumask *mask)
> +{
> + smp_ops.call_func_ipi(mask, ACTTION_CALL_FUNCTION);
> +}
> +
> /*
> * This function sends a 'reschedule' IPI to another CPU.
> * it goes straight through and wastes no time serializing
> @@ -155,11 +155,11 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
> */
> void arch_smp_send_reschedule(int cpu)
> {
> - loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
> + smp_ops.call_func_single_ipi(cpu, ACTTION_RESCHEDULE);
> }
> EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
>
> -irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
> +static irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
> {
> unsigned int action;
> unsigned int cpu = smp_processor_id();
> @@ -179,6 +179,26 @@ irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
> return IRQ_HANDLED;
> }
>
> +static void loongson_ipi_init(void)
> +{
> + int r, ipi_irq;
> +
> + ipi_irq = get_percpu_irq(INT_IPI);
> + if (ipi_irq < 0)
> + panic("IPI IRQ mapping failed\n");
> +
> + irq_set_percpu_devid(ipi_irq);
> + r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &irq_stat);
> + if (r < 0)
> + panic("IPI IRQ request failed\n");
> +}
> +
> +struct smp_ops smp_ops = {
> + .call_func_single_ipi = loongson_send_ipi_single,
> + .call_func_ipi = loongson_send_ipi_mask,
> + .ipi_init = loongson_ipi_init,
> +};
> +
> static void __init fdt_smp_setup(void)
> {
> #ifdef CONFIG_OF
> @@ -256,7 +276,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
>
> csr_mail_send(entry, cpu_logical_map(cpu), 0);
>
> - loongson_send_ipi_single(cpu, SMP_BOOT_CPU);
> + loongson_send_ipi_single(cpu, ACTTION_BOOT_CPU);
> }
>
> /*
> diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
> index e7015f7b70e3..fd5354f9be7c 100644
> --- a/arch/loongarch/kernel/time.c
> +++ b/arch/loongarch/kernel/time.c
> @@ -123,16 +123,6 @@ void sync_counter(void)
> csr_write64(init_offset, LOONGARCH_CSR_CNTC);
> }
>
> -static int get_timer_irq(void)
> -{
> - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
> -
> - if (d)
> - return irq_create_mapping(d, INT_TI);
> -
> - return -EINVAL;
> -}
> -
> int constant_clockevent_init(void)
> {
> unsigned int cpu = smp_processor_id();
> @@ -142,7 +132,7 @@ int constant_clockevent_init(void)
> static int irq = 0, timer_irq_installed = 0;
>
> if (!timer_irq_installed) {
> - irq = get_timer_irq();
> + irq = get_percpu_irq(INT_TI);
> if (irq < 0)
> pr_err("Failed to map irq %d (timer)\n", irq);
> }
> --
> 2.39.3
>
Hi, Bibo,
On Mon, Jan 22, 2024 at 6:03 PM Bibo Mao <[email protected]> wrote:
>
> On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
> register access on ipi sender and two iocsr access on ipi receiver
> which is ipi interrupt handler. On VM mode all iocsr registers
> accessing will trap into hypervisor. So with one ipi hw notification
> there will be three times of trap.
>
> This patch adds pv ipi support for VM, hypercall instruction is used
> to ipi sender, and hypervisor will inject SWI on the VM. During SWI
> interrupt handler, only estat CSR register is written to clear irq.
> Estat CSR register access will not trap into hypervisor. So with pv ipi
> supported, pv ipi sender will trap into hypervsor one time, pv ipi
> revicer will not trap, there is only one time of trap.
>
> Also this patch adds ipi multicast support, the method is similar with
> x86. With ipi multicast support, ipi notification can be sent to at most
> 128 vcpus at one time. It reduces trap into hypervisor greatly.
>
> Signed-off-by: Bibo Mao <[email protected]>
> ---
> arch/loongarch/include/asm/hardirq.h | 1 +
> arch/loongarch/include/asm/kvm_host.h | 1 +
> arch/loongarch/include/asm/kvm_para.h | 124 +++++++++++++++++++++++++
> arch/loongarch/include/asm/loongarch.h | 1 +
> arch/loongarch/kernel/irq.c | 2 +-
> arch/loongarch/kernel/paravirt.c | 113 ++++++++++++++++++++++
> arch/loongarch/kernel/smp.c | 2 +-
> arch/loongarch/kvm/exit.c | 73 ++++++++++++++-
> arch/loongarch/kvm/vcpu.c | 1 +
> 9 files changed, 314 insertions(+), 4 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
> index 9f0038e19c7f..8a611843c1f0 100644
> --- a/arch/loongarch/include/asm/hardirq.h
> +++ b/arch/loongarch/include/asm/hardirq.h
> @@ -21,6 +21,7 @@ enum ipi_msg_type {
> typedef struct {
> unsigned int ipi_irqs[NR_IPI];
> unsigned int __softirq_pending;
> + atomic_t messages ____cacheline_aligned_in_smp;
> } ____cacheline_aligned irq_cpustat_t;
>
> DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> index 57399d7cf8b7..1bf927e2bfac 100644
> --- a/arch/loongarch/include/asm/kvm_host.h
> +++ b/arch/loongarch/include/asm/kvm_host.h
> @@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
> u64 idle_exits;
> u64 cpucfg_exits;
> u64 signal_exits;
> + u64 hvcl_exits;
> };
>
> #define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
> index 41200e922a82..a25a84e372b9 100644
> --- a/arch/loongarch/include/asm/kvm_para.h
> +++ b/arch/loongarch/include/asm/kvm_para.h
> @@ -9,6 +9,10 @@
> #define HYPERVISOR_VENDOR_SHIFT 8
> #define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
>
> +#define KVM_HC_CODE_SERVICE 0
> +#define KVM_HC_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HC_CODE_SERVICE)
> +#define KVM_HC_FUNC_IPI 1
> +
> /*
> * LoongArch hypcall return code
> */
> @@ -16,6 +20,126 @@
> #define KVM_HC_INVALID_CODE -1UL
> #define KVM_HC_INVALID_PARAMETER -2UL
>
> +/*
> + * Hypercalls interface for KVM hypervisor
> + *
> + * a0: function identifier
> + * a1-a6: args
> + * Return value will be placed in v0.
> + * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
> + */
> +static __always_inline long kvm_hypercall(u64 fid)
> +{
> + register long ret asm("v0");
> + register unsigned long fun asm("a0") = fid;
> +
> + __asm__ __volatile__(
> + "hvcl "__stringify(KVM_HC_SERVICE)
> + : "=r" (ret)
> + : "r" (fun)
> + : "memory"
> + );
> +
> + return ret;
> +}
> +
> +static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
> +{
> + register long ret asm("v0");
> + register unsigned long fun asm("a0") = fid;
> + register unsigned long a1 asm("a1") = arg0;
> +
> + __asm__ __volatile__(
> + "hvcl "__stringify(KVM_HC_SERVICE)
> + : "=r" (ret)
> + : "r" (fun), "r" (a1)
> + : "memory"
> + );
> +
> + return ret;
> +}
> +
> +static __always_inline long kvm_hypercall2(u64 fid,
> + unsigned long arg0, unsigned long arg1)
> +{
> + register long ret asm("v0");
> + register unsigned long fun asm("a0") = fid;
> + register unsigned long a1 asm("a1") = arg0;
> + register unsigned long a2 asm("a2") = arg1;
> +
> + __asm__ __volatile__(
> + "hvcl "__stringify(KVM_HC_SERVICE)
> + : "=r" (ret)
> + : "r" (fun), "r" (a1), "r" (a2)
> + : "memory"
> + );
> +
> + return ret;
> +}
> +
> +static __always_inline long kvm_hypercall3(u64 fid,
> + unsigned long arg0, unsigned long arg1, unsigned long arg2)
> +{
> + register long ret asm("v0");
> + register unsigned long fun asm("a0") = fid;
> + register unsigned long a1 asm("a1") = arg0;
> + register unsigned long a2 asm("a2") = arg1;
> + register unsigned long a3 asm("a3") = arg2;
> +
> + __asm__ __volatile__(
> + "hvcl "__stringify(KVM_HC_SERVICE)
> + : "=r" (ret)
> + : "r" (fun), "r" (a1), "r" (a2), "r" (a3)
> + : "memory"
> + );
> +
> + return ret;
> +}
> +
> +static __always_inline long kvm_hypercall4(u64 fid,
> + unsigned long arg0, unsigned long arg1, unsigned long arg2,
> + unsigned long arg3)
> +{
> + register long ret asm("v0");
> + register unsigned long fun asm("a0") = fid;
> + register unsigned long a1 asm("a1") = arg0;
> + register unsigned long a2 asm("a2") = arg1;
> + register unsigned long a3 asm("a3") = arg2;
> + register unsigned long a4 asm("a4") = arg3;
> +
> + __asm__ __volatile__(
> + "hvcl "__stringify(KVM_HC_SERVICE)
> + : "=r" (ret)
> + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4)
> + : "memory"
> + );
> +
> + return ret;
> +}
> +
> +static __always_inline long kvm_hypercall5(u64 fid,
> + unsigned long arg0, unsigned long arg1, unsigned long arg2,
> + unsigned long arg3, unsigned long arg4)
> +{
> + register long ret asm("v0");
> + register unsigned long fun asm("a0") = fid;
> + register unsigned long a1 asm("a1") = arg0;
> + register unsigned long a2 asm("a2") = arg1;
> + register unsigned long a3 asm("a3") = arg2;
> + register unsigned long a4 asm("a4") = arg3;
> + register unsigned long a5 asm("a5") = arg4;
> +
> + __asm__ __volatile__(
> + "hvcl "__stringify(KVM_HC_SERVICE)
> + : "=r" (ret)
> + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5)
> + : "memory"
> + );
> +
> + return ret;
> +}
> +
> +
> static inline unsigned int kvm_arch_para_features(void)
> {
> return 0;
> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
> index a1d22e8b6f94..0ad36704cb4b 100644
> --- a/arch/loongarch/include/asm/loongarch.h
> +++ b/arch/loongarch/include/asm/loongarch.h
> @@ -167,6 +167,7 @@
> #define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
> #define KVM_SIGNATURE "KVM\0"
> #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
> +#define KVM_FEATURE_PV_IPI BIT(1)
>
> #ifndef __ASSEMBLY__
>
> diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
> index 1b58f7c3eed9..b5bd298c981f 100644
> --- a/arch/loongarch/kernel/irq.c
> +++ b/arch/loongarch/kernel/irq.c
> @@ -113,5 +113,5 @@ void __init init_IRQ(void)
> per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
> }
>
> - set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
> + set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
> }
> diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
> index 21d01d05791a..b840a004995a 100644
> --- a/arch/loongarch/kernel/paravirt.c
> +++ b/arch/loongarch/kernel/paravirt.c
> @@ -1,6 +1,7 @@
> // SPDX-License-Identifier: GPL-2.0
> #include <linux/export.h>
> #include <linux/types.h>
> +#include <linux/interrupt.h>
> #include <linux/jump_label.h>
> #include <linux/kvm_para.h>
> #include <asm/paravirt.h>
> @@ -16,6 +17,104 @@ static u64 native_steal_clock(int cpu)
>
> DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
>
> +#ifdef CONFIG_SMP
> +static void pv_send_ipi_single(int cpu, unsigned int action)
> +{
> + unsigned int min, old;
> + unsigned long bitmap = 0;
> + irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
> +
> + action = BIT(action);
> + old = atomic_fetch_or(action, &info->messages);
> + if (old == 0) {
> + min = cpu_logical_map(cpu);
> + bitmap = 1;
> + kvm_hypercall3(KVM_HC_FUNC_IPI, bitmap, 0, min);
> + }
> +}
> +
> +#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
> +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
> +{
> + unsigned int cpu, i, min = 0, max = 0, old;
> + __uint128_t bitmap = 0;
> + irq_cpustat_t *info;
> +
> + if (cpumask_empty(mask))
> + return;
> +
> + action = BIT(action);
> + for_each_cpu(i, mask) {
> + info = &per_cpu(irq_stat, i);
> + old = atomic_fetch_or(action, &info->messages);
> + if (old)
> + continue;
> +
> + cpu = cpu_logical_map(i);
> + if (!bitmap) {
> + min = max = cpu;
> + } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) {
> + max = cpu > max ? cpu : max;
> + } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) {
> + bitmap <<= min - cpu;
> + min = cpu;
> + } else {
> + /*
> + * Physical cpuid is sorted in ascending order ascend
> + * for the next mask calculation, send IPI here
> + * directly and skip the remainding cpus
> + */
> + kvm_hypercall3(KVM_HC_FUNC_IPI, (unsigned long)bitmap,
> + (unsigned long)(bitmap >> BITS_PER_LONG), min);
> + min = max = cpu;
> + bitmap = 0;
> + }
> + __set_bit(cpu - min, (unsigned long *)&bitmap);
> + }
> +
> + if (bitmap)
> + kvm_hypercall3(KVM_HC_FUNC_IPI, (unsigned long)bitmap,
> + (unsigned long)(bitmap >> BITS_PER_LONG), min);
> +}
> +
> +static irqreturn_t loongson_do_swi(int irq, void *dev)
> +{
> + irq_cpustat_t *info;
> + long action;
> +
> + clear_csr_estat(1 << INT_SWI0);
> +
> + info = this_cpu_ptr(&irq_stat);
> + do {
> + action = atomic_xchg(&info->messages, 0);
> + if (action & SMP_CALL_FUNCTION) {
> + generic_smp_call_function_interrupt();
> + info->ipi_irqs[IPI_CALL_FUNCTION]++;
> + }
> +
> + if (action & SMP_RESCHEDULE) {
> + scheduler_ipi();
> + info->ipi_irqs[IPI_RESCHEDULE]++;
> + }
> + } while (action);
> +
> + return IRQ_HANDLED;
> +}
> +
> +static void pv_ipi_init(void)
> +{
> + int r, swi0;
> +
> + swi0 = get_percpu_irq(INT_SWI0);
> + if (swi0 < 0)
> + panic("SWI0 IRQ mapping failed\n");
> + irq_set_percpu_devid(swi0);
> + r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
> + if (r < 0)
> + panic("SWI0 IRQ request failed\n");
> +}
> +#endif
> +
> static bool kvm_para_available(void)
> {
> static int hypervisor_type;
> @@ -32,10 +131,24 @@ static bool kvm_para_available(void)
>
> int __init pv_guest_init(void)
> {
> + int feature;
> +
> if (!cpu_has_hypervisor)
> return 0;
> if (!kvm_para_available())
> return 0;
>
> + /*
> + * check whether KVM hypervisor supports pv_ipi or not
> + */
> +#ifdef CONFIG_SMP
> + feature = read_cpucfg(CPUCFG_KVM_FEATURE);
> + if (feature & KVM_FEATURE_PV_IPI) {
> + smp_ops.call_func_single_ipi = pv_send_ipi_single;
> + smp_ops.call_func_ipi = pv_send_ipi_mask;
From this patch I found that these function are supposed to send any
types of IPI, when the naming is call_func_xxx? Maybe send_ipi_single
and send_ipi_mask is more accurate.
Huacai
> + smp_ops.ipi_init = pv_ipi_init;
> + }
> +#endif
> +
> return 1;
> }
> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
> index 46735ba49815..57b5706cedb9 100644
> --- a/arch/loongarch/kernel/smp.c
> +++ b/arch/loongarch/kernel/smp.c
> @@ -285,7 +285,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
> void loongson_init_secondary(void)
> {
> unsigned int cpu = smp_processor_id();
> - unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
> + unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
> ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
>
> change_csr_ecfg(ECFG0_IM, imask);
> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
> index f4e4df05f578..189b70bad825 100644
> --- a/arch/loongarch/kvm/exit.c
> +++ b/arch/loongarch/kvm/exit.c
> @@ -227,6 +227,9 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
> case CPUCFG_KVM_SIG:
> vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
> break;
> + case CPUCFG_KVM_FEATURE:
> + vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
> + break;
> default:
> vcpu->arch.gprs[rd] = 0;
> break;
> @@ -699,12 +702,78 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
> return RESUME_GUEST;
> }
>
> +static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu)
> +{
> + unsigned long ipi_bitmap;
> + unsigned int min, cpu, i;
> + struct kvm_vcpu *dest;
> +
> + min = vcpu->arch.gprs[LOONGARCH_GPR_A3];
> + for (i = 0; i < 2; i++) {
> + ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i];
> + if (!ipi_bitmap)
> + continue;
> +
> + cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG);
> + while (cpu < BITS_PER_LONG) {
> + dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min);
> + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG,
> + cpu + 1);
> + if (!dest)
> + continue;
> +
> + /*
> + * Send SWI0 to dest vcpu to emulate IPI interrupt
> + */
> + kvm_queue_irq(dest, INT_SWI0);
> + kvm_vcpu_kick(dest);
> + }
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * hypcall emulation always return to guest, Caller should check retval.
> + */
> +static void kvm_handle_pv_hcall(struct kvm_vcpu *vcpu)
> +{
> + unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0];
> + long ret;
> +
> + switch (func) {
> + case KVM_HC_FUNC_IPI:
> + kvm_pv_send_ipi(vcpu);
> + ret = KVM_HC_STATUS_SUCCESS;
> + break;
> + default:
> + ret = KVM_HC_INVALID_CODE;
> + break;
> + };
> +
> + vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret;
> +}
> +
> static int kvm_handle_hypcall(struct kvm_vcpu *vcpu)
> {
> + larch_inst inst;
> + unsigned int code;
> +
> + inst.word = vcpu->arch.badi;
> + code = inst.reg0i15_format.immediate;
> update_pc(&vcpu->arch);
>
> - /* Treat it as noop intruction, only set return value */
> - vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
> + switch (code) {
> + case KVM_HC_SERVICE:
> + vcpu->stat.hvcl_exits++;
> + kvm_handle_pv_hcall(vcpu);
> + break;
> + default:
> + /* Treat it as noop intruction, only set return value */
> + vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
> + break;
> + }
> +
> return RESUME_GUEST;
> }
>
> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
> index 97ca9c7160e6..80e05ba9b48d 100644
> --- a/arch/loongarch/kvm/vcpu.c
> +++ b/arch/loongarch/kvm/vcpu.c
> @@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
> STATS_DESC_COUNTER(VCPU, idle_exits),
> STATS_DESC_COUNTER(VCPU, cpucfg_exits),
> STATS_DESC_COUNTER(VCPU, signal_exits),
> + STATS_DESC_COUNTER(VCPU, hvcl_exits)
> };
>
> const struct kvm_stats_header kvm_vcpu_stats_header = {
> --
> 2.39.3
>
>
Hi, Bibo,
Without this patch I can also create a SMP VM, so what problem does
this patch want to solve?
Huacai
On Mon, Jan 22, 2024 at 6:03 PM Bibo Mao <[email protected]> wrote:
>
> Physical cpuid is used to irq routing for irqchips such as ipi/msi/
> extioi interrupt controller. And physical cpuid is stored at CSR
> register LOONGARCH_CSR_CPUID, it can not be changed once vcpu is
> created. Since different irqchips have different size definition
> about physical cpuid, KVM uses the smallest cpuid from extioi, and
> the max cpuid size is defines as 256.
>
> Signed-off-by: Bibo Mao <[email protected]>
> ---
> arch/loongarch/include/asm/kvm_host.h | 26 ++++++++
> arch/loongarch/include/asm/kvm_vcpu.h | 1 +
> arch/loongarch/kvm/vcpu.c | 93 ++++++++++++++++++++++++++-
> arch/loongarch/kvm/vm.c | 11 ++++
> 4 files changed, 130 insertions(+), 1 deletion(-)
>
> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> index 2d62f7b0d377..57399d7cf8b7 100644
> --- a/arch/loongarch/include/asm/kvm_host.h
> +++ b/arch/loongarch/include/asm/kvm_host.h
> @@ -64,6 +64,30 @@ struct kvm_world_switch {
>
> #define MAX_PGTABLE_LEVELS 4
>
> +/*
> + * Physical cpu id is used for interrupt routing, there are different
> + * definitions about physical cpuid on different hardwares.
> + * For LOONGARCH_CSR_CPUID register, max cpuid size if 512
> + * For IPI HW, max dest CPUID size 1024
> + * For extioi interrupt controller, max dest CPUID size is 256
> + * For MSI interrupt controller, max supported CPUID size is 65536
> + *
> + * Currently max CPUID is defined as 256 for KVM hypervisor, in future
> + * it will be expanded to 4096, including 16 packages at most. And every
> + * package supports at most 256 vcpus
> + */
> +#define KVM_MAX_PHYID 256
> +
> +struct kvm_phyid_info {
> + struct kvm_vcpu *vcpu;
> + bool enabled;
> +};
> +
> +struct kvm_phyid_map {
> + int max_phyid;
> + struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
> +};
> +
> struct kvm_arch {
> /* Guest physical mm */
> kvm_pte_t *pgd;
> @@ -71,6 +95,8 @@ struct kvm_arch {
> unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
> unsigned int pte_shifts[MAX_PGTABLE_LEVELS];
> unsigned int root_level;
> + struct mutex phyid_map_lock;
> + struct kvm_phyid_map *phyid_map;
>
> s64 time_offset;
> struct kvm_context __percpu *vmcs;
> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
> index e71ceb88f29e..2402129ee955 100644
> --- a/arch/loongarch/include/asm/kvm_vcpu.h
> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
> @@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
> void kvm_restore_timer(struct kvm_vcpu *vcpu);
>
> int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
>
> /*
> * Loongarch KVM guest interrupt handling
> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
> index 27701991886d..97ca9c7160e6 100644
> --- a/arch/loongarch/kvm/vcpu.c
> +++ b/arch/loongarch/kvm/vcpu.c
> @@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
> return 0;
> }
>
> +static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
> +{
> + int cpuid;
> + struct loongarch_csrs *csr = vcpu->arch.csr;
> + struct kvm_phyid_map *map;
> +
> + if (val >= KVM_MAX_PHYID)
> + return -EINVAL;
> +
> + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
> + map = vcpu->kvm->arch.phyid_map;
> + mutex_lock(&vcpu->kvm->arch.phyid_map_lock);
> + if (map->phys_map[cpuid].enabled) {
> + /*
> + * Cpuid is already set before
> + * Forbid changing different cpuid at runtime
> + */
> + if (cpuid != val) {
> + /*
> + * Cpuid 0 is initial value for vcpu, maybe invalid
> + * unset value for vcpu
> + */
> + if (cpuid) {
> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
> + return -EINVAL;
> + }
> + } else {
> + /* Discard duplicated cpuid set */
> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
> + return 0;
> + }
> + }
> +
> + if (map->phys_map[val].enabled) {
> + /*
> + * New cpuid is already set with other vcpu
> + * Forbid sharing the same cpuid between different vcpus
> + */
> + if (map->phys_map[val].vcpu != vcpu) {
> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
> + return -EINVAL;
> + }
> +
> + /* Discard duplicated cpuid set operation*/
> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
> + return 0;
> + }
> +
> + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
> + map->phys_map[val].enabled = true;
> + map->phys_map[val].vcpu = vcpu;
> + if (map->max_phyid < val)
> + map->max_phyid = val;
> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
> + return 0;
> +}
> +
> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
> +{
> + struct kvm_phyid_map *map;
> +
> + if (cpuid >= KVM_MAX_PHYID)
> + return NULL;
> +
> + map = kvm->arch.phyid_map;
> + if (map->phys_map[cpuid].enabled)
> + return map->phys_map[cpuid].vcpu;
> +
> + return NULL;
> +}
> +
> +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
> +{
> + int cpuid;
> + struct loongarch_csrs *csr = vcpu->arch.csr;
> + struct kvm_phyid_map *map;
> +
> + map = vcpu->kvm->arch.phyid_map;
> + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
> + if (cpuid >= KVM_MAX_PHYID)
> + return;
> +
> + if (map->phys_map[cpuid].enabled) {
> + map->phys_map[cpuid].vcpu = NULL;
> + map->phys_map[cpuid].enabled = false;
> + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0);
> + }
> +}
> +
> static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
> {
> int ret = 0, gintc;
> @@ -291,7 +380,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
> kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc);
>
> return ret;
> - }
> + } else if (id == LOONGARCH_CSR_CPUID)
> + return kvm_set_cpuid(vcpu, val);
>
> kvm_write_sw_gcsr(csr, id, val);
>
> @@ -925,6 +1015,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
> hrtimer_cancel(&vcpu->arch.swtimer);
> kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
> kfree(vcpu->arch.csr);
> + kvm_drop_cpuid(vcpu);
>
> /*
> * If the vCPU is freed and reused as another vCPU, we don't want the
> diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
> index 0a37f6fa8f2d..6fd5916ebef3 100644
> --- a/arch/loongarch/kvm/vm.c
> +++ b/arch/loongarch/kvm/vm.c
> @@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> if (!kvm->arch.pgd)
> return -ENOMEM;
>
> + kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map),
> + GFP_KERNEL_ACCOUNT);
> + if (!kvm->arch.phyid_map) {
> + free_page((unsigned long)kvm->arch.pgd);
> + kvm->arch.pgd = NULL;
> + return -ENOMEM;
> + }
> +
> kvm_init_vmcs(kvm);
> kvm->arch.gpa_size = BIT(cpu_vabits - 1);
> kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
> @@ -44,6 +52,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> for (i = 0; i <= kvm->arch.root_level; i++)
> kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3);
>
> + mutex_init(&kvm->arch.phyid_map_lock);
> return 0;
> }
>
> @@ -51,7 +60,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
> {
> kvm_destroy_vcpus(kvm);
> free_page((unsigned long)kvm->arch.pgd);
> + kvfree(kvm->arch.phyid_map);
> kvm->arch.pgd = NULL;
> + kvm->arch.phyid_map = NULL;
> }
>
> int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> --
> 2.39.3
>
On 2024/1/29 下午8:38, Huacai Chen wrote:
> Hi, Bibo,
>
> On Mon, Jan 22, 2024 at 6:03 PM Bibo Mao <[email protected]> wrote:
>>
>> This patch refines ipi handling on LoongArch platform, there are
>> three changes with this patch.
>> 1. Add generic get_percpu_irq api, replace some percpu irq function
>> such as get_ipi_irq/get_pmc_irq/get_timer_irq with get_percpu_irq.
>>
>> 2. Change parameter action definition with function
>> loongson_send_ipi_single and loongson_send_ipi_mask. Code encoding is used
>> here rather than bitmap encoding for ipi action, ipi hw sender uses action
>> code, and ipi receiver will get action bitmap encoding, the ipi hw will
>> convert it into bitmap in ipi message buffer.
>>
>> 3. Add smp_ops on LoongArch platform so that pv ipi can be used later.
>>
>> Signed-off-by: Bibo Mao <[email protected]>
>> ---
>> arch/loongarch/include/asm/hardirq.h | 4 ++
>> arch/loongarch/include/asm/irq.h | 10 ++++-
>> arch/loongarch/include/asm/smp.h | 31 +++++++--------
>> arch/loongarch/kernel/irq.c | 22 +----------
>> arch/loongarch/kernel/perf_event.c | 14 +------
>> arch/loongarch/kernel/smp.c | 58 +++++++++++++++++++---------
>> arch/loongarch/kernel/time.c | 12 +-----
>> 7 files changed, 71 insertions(+), 80 deletions(-)
>>
>> diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
>> index 0ef3b18f8980..9f0038e19c7f 100644
>> --- a/arch/loongarch/include/asm/hardirq.h
>> +++ b/arch/loongarch/include/asm/hardirq.h
>> @@ -12,6 +12,10 @@
>> extern void ack_bad_irq(unsigned int irq);
>> #define ack_bad_irq ack_bad_irq
>>
>> +enum ipi_msg_type {
>> + IPI_RESCHEDULE,
>> + IPI_CALL_FUNCTION,
>> +};
>> #define NR_IPI 2
>>
>> typedef struct {
>> diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
>> index 218b4da0ea90..00101b6d601e 100644
>> --- a/arch/loongarch/include/asm/irq.h
>> +++ b/arch/loongarch/include/asm/irq.h
>> @@ -117,8 +117,16 @@ extern struct fwnode_handle *liointc_handle;
>> extern struct fwnode_handle *pch_lpc_handle;
>> extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
>>
>> -extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
>> +static inline int get_percpu_irq(int vector)
>> +{
>> + struct irq_domain *d;
>> +
>> + d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
>> + if (d)
>> + return irq_create_mapping(d, vector);
>>
>> + return -EINVAL;
>> +}
>> #include <asm-generic/irq.h>
>>
>> #endif /* _ASM_IRQ_H */
>> diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
>> index f81e5f01d619..330f1cb3741c 100644
>> --- a/arch/loongarch/include/asm/smp.h
>> +++ b/arch/loongarch/include/asm/smp.h
>> @@ -12,6 +12,13 @@
>> #include <linux/threads.h>
>> #include <linux/cpumask.h>
>>
>> +struct smp_ops {
>> + void (*call_func_ipi)(const struct cpumask *mask, unsigned int action);
>> + void (*call_func_single_ipi)(int cpu, unsigned int action);
> To keep consistency, it is better to use call_func_ipi_single and
> call_func_ipi_mask.
yes, how about using send_ipi_single/send_ipi_mask here? since both
function arch_smp_send_reschedule() and
arch_send_call_function_single_ipi use smp_ops.
>
>> + void (*ipi_init)(void);
>> +};
>> +
>> +extern struct smp_ops smp_ops;
>> extern int smp_num_siblings;
>> extern int num_processors;
>> extern int disabled_cpus;
>> @@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
>> void loongson_boot_secondary(int cpu, struct task_struct *idle);
>> void loongson_init_secondary(void);
>> void loongson_smp_finish(void);
>> -void loongson_send_ipi_single(int cpu, unsigned int action);
>> -void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
>> #ifdef CONFIG_HOTPLUG_CPU
>> int loongson_cpu_disable(void);
>> void loongson_cpu_die(unsigned int cpu);
>> @@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
>>
>> #define cpu_physical_id(cpu) cpu_logical_map(cpu)
>>
>> -#define SMP_BOOT_CPU 0x1
>> -#define SMP_RESCHEDULE 0x2
>> -#define SMP_CALL_FUNCTION 0x4
>> +#define ACTTION_BOOT_CPU 0
>> +#define ACTTION_RESCHEDULE 1
>> +#define ACTTION_CALL_FUNCTION 2
>> +#define SMP_BOOT_CPU BIT(ACTTION_BOOT_CPU)
>> +#define SMP_RESCHEDULE BIT(ACTTION_RESCHEDULE)
>> +#define SMP_CALL_FUNCTION BIT(ACTTION_CALL_FUNCTION)
>>
>> struct secondary_data {
>> unsigned long stack;
>> @@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data;
>>
>> extern asmlinkage void smpboot_entry(void);
>> extern asmlinkage void start_secondary(void);
>> -
>> +extern void arch_send_call_function_single_ipi(int cpu);
>> +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
> Similarly, to keep consistency, it is better to use
> arch_send_function_ipi_single and arch_send_function_ipi_mask.
These two functions are used by all architectures and called in commcon
code send_call_function_single_ipi(). It is the same with removed static
inline function as follows:
-static inline void arch_send_call_function_single_ipi(int cpu)
-{
- loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
-}
-
-static inline void arch_send_call_function_ipi_mask(const struct
cpumask *mask)
-{
- loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
-}
-
Regards
Bibo Mao
>
> Huacai
>
>> extern void calculate_cpu_foreign_map(void);
>>
>> /*
>> @@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void);
>> */
>> extern void show_ipi_list(struct seq_file *p, int prec);
>>
>> -static inline void arch_send_call_function_single_ipi(int cpu)
>> -{
>> - loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
>> -}
>> -
>> -static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
>> -{
>> - loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
>> -}
>> -
>> #ifdef CONFIG_HOTPLUG_CPU
>> static inline int __cpu_disable(void)
>> {
>> diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
>> index 883e5066ae44..1b58f7c3eed9 100644
>> --- a/arch/loongarch/kernel/irq.c
>> +++ b/arch/loongarch/kernel/irq.c
>> @@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void)
>> acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse);
>> }
>>
>> -static int __init get_ipi_irq(void)
>> -{
>> - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
>> -
>> - if (d)
>> - return irq_create_mapping(d, INT_IPI);
>> -
>> - return -EINVAL;
>> -}
>> -
>> void __init init_IRQ(void)
>> {
>> int i;
>> -#ifdef CONFIG_SMP
>> - int r, ipi_irq;
>> - static int ipi_dummy_dev;
>> -#endif
>> unsigned int order = get_order(IRQ_STACK_SIZE);
>> struct page *page;
>>
>> @@ -113,13 +99,7 @@ void __init init_IRQ(void)
>> init_vec_parent_group();
>> irqchip_init();
>> #ifdef CONFIG_SMP
>> - ipi_irq = get_ipi_irq();
>> - if (ipi_irq < 0)
>> - panic("IPI IRQ mapping failed\n");
>> - irq_set_percpu_devid(ipi_irq);
>> - r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev);
>> - if (r < 0)
>> - panic("IPI IRQ request failed\n");
>> + smp_ops.ipi_init();
>> #endif
>>
>> for (i = 0; i < NR_IRQS; i++)
>> diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
>> index 0491bf453cd4..3265c8f33223 100644
>> --- a/arch/loongarch/kernel/perf_event.c
>> +++ b/arch/loongarch/kernel/perf_event.c
>> @@ -456,16 +456,6 @@ static void loongarch_pmu_disable(struct pmu *pmu)
>> static DEFINE_MUTEX(pmu_reserve_mutex);
>> static atomic_t active_events = ATOMIC_INIT(0);
>>
>> -static int get_pmc_irq(void)
>> -{
>> - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
>> -
>> - if (d)
>> - return irq_create_mapping(d, INT_PCOV);
>> -
>> - return -EINVAL;
>> -}
>> -
>> static void reset_counters(void *arg);
>> static int __hw_perf_event_init(struct perf_event *event);
>>
>> @@ -473,7 +463,7 @@ static void hw_perf_event_destroy(struct perf_event *event)
>> {
>> if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
>> on_each_cpu(reset_counters, NULL, 1);
>> - free_irq(get_pmc_irq(), &loongarch_pmu);
>> + free_irq(get_percpu_irq(INT_PCOV), &loongarch_pmu);
>> mutex_unlock(&pmu_reserve_mutex);
>> }
>> }
>> @@ -562,7 +552,7 @@ static int loongarch_pmu_event_init(struct perf_event *event)
>> if (event->cpu >= 0 && !cpu_online(event->cpu))
>> return -ENODEV;
>>
>> - irq = get_pmc_irq();
>> + irq = get_percpu_irq(INT_PCOV);
>> flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED;
>> if (!atomic_inc_not_zero(&active_events)) {
>> mutex_lock(&pmu_reserve_mutex);
>> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
>> index a16e3dbe9f09..46735ba49815 100644
>> --- a/arch/loongarch/kernel/smp.c
>> +++ b/arch/loongarch/kernel/smp.c
>> @@ -66,11 +66,6 @@ static cpumask_t cpu_core_setup_map;
>> struct secondary_data cpuboot_data;
>> static DEFINE_PER_CPU(int, cpu_state);
>>
>> -enum ipi_msg_type {
>> - IPI_RESCHEDULE,
>> - IPI_CALL_FUNCTION,
>> -};
>> -
>> static const char *ipi_types[NR_IPI] __tracepoint_string = {
>> [IPI_RESCHEDULE] = "Rescheduling interrupts",
>> [IPI_CALL_FUNCTION] = "Function call interrupts",
>> @@ -123,24 +118,19 @@ static u32 ipi_read_clear(int cpu)
>>
>> static void ipi_write_action(int cpu, u32 action)
>> {
>> - unsigned int irq = 0;
>> -
>> - while ((irq = ffs(action))) {
>> - uint32_t val = IOCSR_IPI_SEND_BLOCKING;
>> + uint32_t val;
>>
>> - val |= (irq - 1);
>> - val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
>> - iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
>> - action &= ~BIT(irq - 1);
>> - }
>> + val = IOCSR_IPI_SEND_BLOCKING | action;
>> + val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
>> + iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
>> }
>>
>> -void loongson_send_ipi_single(int cpu, unsigned int action)
>> +static void loongson_send_ipi_single(int cpu, unsigned int action)
>> {
>> ipi_write_action(cpu_logical_map(cpu), (u32)action);
>> }
>>
>> -void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
>> +static void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
>> {
>> unsigned int i;
>>
>> @@ -148,6 +138,16 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
>> ipi_write_action(cpu_logical_map(i), (u32)action);
>> }
>>
>> +void arch_send_call_function_single_ipi(int cpu)
>> +{
>> + smp_ops.call_func_single_ipi(cpu, ACTTION_CALL_FUNCTION);
>> +}
>> +
>> +void arch_send_call_function_ipi_mask(const struct cpumask *mask)
>> +{
>> + smp_ops.call_func_ipi(mask, ACTTION_CALL_FUNCTION);
>> +}
>> +
>> /*
>> * This function sends a 'reschedule' IPI to another CPU.
>> * it goes straight through and wastes no time serializing
>> @@ -155,11 +155,11 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
>> */
>> void arch_smp_send_reschedule(int cpu)
>> {
>> - loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
>> + smp_ops.call_func_single_ipi(cpu, ACTTION_RESCHEDULE);
>> }
>> EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
>>
>> -irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
>> +static irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
>> {
>> unsigned int action;
>> unsigned int cpu = smp_processor_id();
>> @@ -179,6 +179,26 @@ irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
>> return IRQ_HANDLED;
>> }
>>
>> +static void loongson_ipi_init(void)
>> +{
>> + int r, ipi_irq;
>> +
>> + ipi_irq = get_percpu_irq(INT_IPI);
>> + if (ipi_irq < 0)
>> + panic("IPI IRQ mapping failed\n");
>> +
>> + irq_set_percpu_devid(ipi_irq);
>> + r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &irq_stat);
>> + if (r < 0)
>> + panic("IPI IRQ request failed\n");
>> +}
>> +
>> +struct smp_ops smp_ops = {
>> + .call_func_single_ipi = loongson_send_ipi_single,
>> + .call_func_ipi = loongson_send_ipi_mask,
>> + .ipi_init = loongson_ipi_init,
>> +};
>> +
>> static void __init fdt_smp_setup(void)
>> {
>> #ifdef CONFIG_OF
>> @@ -256,7 +276,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
>>
>> csr_mail_send(entry, cpu_logical_map(cpu), 0);
>>
>> - loongson_send_ipi_single(cpu, SMP_BOOT_CPU);
>> + loongson_send_ipi_single(cpu, ACTTION_BOOT_CPU);
>> }
>>
>> /*
>> diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
>> index e7015f7b70e3..fd5354f9be7c 100644
>> --- a/arch/loongarch/kernel/time.c
>> +++ b/arch/loongarch/kernel/time.c
>> @@ -123,16 +123,6 @@ void sync_counter(void)
>> csr_write64(init_offset, LOONGARCH_CSR_CNTC);
>> }
>>
>> -static int get_timer_irq(void)
>> -{
>> - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
>> -
>> - if (d)
>> - return irq_create_mapping(d, INT_TI);
>> -
>> - return -EINVAL;
>> -}
>> -
>> int constant_clockevent_init(void)
>> {
>> unsigned int cpu = smp_processor_id();
>> @@ -142,7 +132,7 @@ int constant_clockevent_init(void)
>> static int irq = 0, timer_irq_installed = 0;
>>
>> if (!timer_irq_installed) {
>> - irq = get_timer_irq();
>> + irq = get_percpu_irq(INT_TI);
>> if (irq < 0)
>> pr_err("Failed to map irq %d (timer)\n", irq);
>> }
>> --
>> 2.39.3
>>
On 2024/1/29 下午9:10, Huacai Chen wrote:
> Hi, Bibo,
>
> On Mon, Jan 22, 2024 at 6:03 PM Bibo Mao <[email protected]> wrote:
>>
>> On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
>> register access on ipi sender and two iocsr access on ipi receiver
>> which is ipi interrupt handler. On VM mode all iocsr registers
>> accessing will trap into hypervisor. So with one ipi hw notification
>> there will be three times of trap.
>>
>> This patch adds pv ipi support for VM, hypercall instruction is used
>> to ipi sender, and hypervisor will inject SWI on the VM. During SWI
>> interrupt handler, only estat CSR register is written to clear irq.
>> Estat CSR register access will not trap into hypervisor. So with pv ipi
>> supported, pv ipi sender will trap into hypervsor one time, pv ipi
>> revicer will not trap, there is only one time of trap.
>>
>> Also this patch adds ipi multicast support, the method is similar with
>> x86. With ipi multicast support, ipi notification can be sent to at most
>> 128 vcpus at one time. It reduces trap into hypervisor greatly.
>>
>> Signed-off-by: Bibo Mao <[email protected]>
>> ---
>> arch/loongarch/include/asm/hardirq.h | 1 +
>> arch/loongarch/include/asm/kvm_host.h | 1 +
>> arch/loongarch/include/asm/kvm_para.h | 124 +++++++++++++++++++++++++
>> arch/loongarch/include/asm/loongarch.h | 1 +
>> arch/loongarch/kernel/irq.c | 2 +-
>> arch/loongarch/kernel/paravirt.c | 113 ++++++++++++++++++++++
>> arch/loongarch/kernel/smp.c | 2 +-
>> arch/loongarch/kvm/exit.c | 73 ++++++++++++++-
>> arch/loongarch/kvm/vcpu.c | 1 +
>> 9 files changed, 314 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
>> index 9f0038e19c7f..8a611843c1f0 100644
>> --- a/arch/loongarch/include/asm/hardirq.h
>> +++ b/arch/loongarch/include/asm/hardirq.h
>> @@ -21,6 +21,7 @@ enum ipi_msg_type {
>> typedef struct {
>> unsigned int ipi_irqs[NR_IPI];
>> unsigned int __softirq_pending;
>> + atomic_t messages ____cacheline_aligned_in_smp;
>> } ____cacheline_aligned irq_cpustat_t;
>>
>> DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>> index 57399d7cf8b7..1bf927e2bfac 100644
>> --- a/arch/loongarch/include/asm/kvm_host.h
>> +++ b/arch/loongarch/include/asm/kvm_host.h
>> @@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
>> u64 idle_exits;
>> u64 cpucfg_exits;
>> u64 signal_exits;
>> + u64 hvcl_exits;
>> };
>>
>> #define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
>> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
>> index 41200e922a82..a25a84e372b9 100644
>> --- a/arch/loongarch/include/asm/kvm_para.h
>> +++ b/arch/loongarch/include/asm/kvm_para.h
>> @@ -9,6 +9,10 @@
>> #define HYPERVISOR_VENDOR_SHIFT 8
>> #define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
>>
>> +#define KVM_HC_CODE_SERVICE 0
>> +#define KVM_HC_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HC_CODE_SERVICE)
>> +#define KVM_HC_FUNC_IPI 1
>> +
>> /*
>> * LoongArch hypcall return code
>> */
>> @@ -16,6 +20,126 @@
>> #define KVM_HC_INVALID_CODE -1UL
>> #define KVM_HC_INVALID_PARAMETER -2UL
>>
>> +/*
>> + * Hypercalls interface for KVM hypervisor
>> + *
>> + * a0: function identifier
>> + * a1-a6: args
>> + * Return value will be placed in v0.
>> + * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
>> + */
>> +static __always_inline long kvm_hypercall(u64 fid)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r" (fun)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r" (fun), "r" (a1)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall2(u64 fid,
>> + unsigned long arg0, unsigned long arg1)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> + register unsigned long a2 asm("a2") = arg1;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r" (fun), "r" (a1), "r" (a2)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall3(u64 fid,
>> + unsigned long arg0, unsigned long arg1, unsigned long arg2)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> + register unsigned long a2 asm("a2") = arg1;
>> + register unsigned long a3 asm("a3") = arg2;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r" (fun), "r" (a1), "r" (a2), "r" (a3)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall4(u64 fid,
>> + unsigned long arg0, unsigned long arg1, unsigned long arg2,
>> + unsigned long arg3)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> + register unsigned long a2 asm("a2") = arg1;
>> + register unsigned long a3 asm("a3") = arg2;
>> + register unsigned long a4 asm("a4") = arg3;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall5(u64 fid,
>> + unsigned long arg0, unsigned long arg1, unsigned long arg2,
>> + unsigned long arg3, unsigned long arg4)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> + register unsigned long a2 asm("a2") = arg1;
>> + register unsigned long a3 asm("a3") = arg2;
>> + register unsigned long a4 asm("a4") = arg3;
>> + register unsigned long a5 asm("a5") = arg4;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +
>> static inline unsigned int kvm_arch_para_features(void)
>> {
>> return 0;
>> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
>> index a1d22e8b6f94..0ad36704cb4b 100644
>> --- a/arch/loongarch/include/asm/loongarch.h
>> +++ b/arch/loongarch/include/asm/loongarch.h
>> @@ -167,6 +167,7 @@
>> #define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
>> #define KVM_SIGNATURE "KVM\0"
>> #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
>> +#define KVM_FEATURE_PV_IPI BIT(1)
>>
>> #ifndef __ASSEMBLY__
>>
>> diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
>> index 1b58f7c3eed9..b5bd298c981f 100644
>> --- a/arch/loongarch/kernel/irq.c
>> +++ b/arch/loongarch/kernel/irq.c
>> @@ -113,5 +113,5 @@ void __init init_IRQ(void)
>> per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
>> }
>>
>> - set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
>> + set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
>> }
>> diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
>> index 21d01d05791a..b840a004995a 100644
>> --- a/arch/loongarch/kernel/paravirt.c
>> +++ b/arch/loongarch/kernel/paravirt.c
>> @@ -1,6 +1,7 @@
>> // SPDX-License-Identifier: GPL-2.0
>> #include <linux/export.h>
>> #include <linux/types.h>
>> +#include <linux/interrupt.h>
>> #include <linux/jump_label.h>
>> #include <linux/kvm_para.h>
>> #include <asm/paravirt.h>
>> @@ -16,6 +17,104 @@ static u64 native_steal_clock(int cpu)
>>
>> DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
>>
>> +#ifdef CONFIG_SMP
>> +static void pv_send_ipi_single(int cpu, unsigned int action)
>> +{
>> + unsigned int min, old;
>> + unsigned long bitmap = 0;
>> + irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
>> +
>> + action = BIT(action);
>> + old = atomic_fetch_or(action, &info->messages);
>> + if (old == 0) {
>> + min = cpu_logical_map(cpu);
>> + bitmap = 1;
>> + kvm_hypercall3(KVM_HC_FUNC_IPI, bitmap, 0, min);
>> + }
>> +}
>> +
>> +#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
>> +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
>> +{
>> + unsigned int cpu, i, min = 0, max = 0, old;
>> + __uint128_t bitmap = 0;
>> + irq_cpustat_t *info;
>> +
>> + if (cpumask_empty(mask))
>> + return;
>> +
>> + action = BIT(action);
>> + for_each_cpu(i, mask) {
>> + info = &per_cpu(irq_stat, i);
>> + old = atomic_fetch_or(action, &info->messages);
>> + if (old)
>> + continue;
>> +
>> + cpu = cpu_logical_map(i);
>> + if (!bitmap) {
>> + min = max = cpu;
>> + } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) {
>> + max = cpu > max ? cpu : max;
>> + } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) {
>> + bitmap <<= min - cpu;
>> + min = cpu;
>> + } else {
>> + /*
>> + * Physical cpuid is sorted in ascending order ascend
>> + * for the next mask calculation, send IPI here
>> + * directly and skip the remainding cpus
>> + */
>> + kvm_hypercall3(KVM_HC_FUNC_IPI, (unsigned long)bitmap,
>> + (unsigned long)(bitmap >> BITS_PER_LONG), min);
>> + min = max = cpu;
>> + bitmap = 0;
>> + }
>> + __set_bit(cpu - min, (unsigned long *)&bitmap);
>> + }
>> +
>> + if (bitmap)
>> + kvm_hypercall3(KVM_HC_FUNC_IPI, (unsigned long)bitmap,
>> + (unsigned long)(bitmap >> BITS_PER_LONG), min);
>> +}
>> +
>> +static irqreturn_t loongson_do_swi(int irq, void *dev)
>> +{
>> + irq_cpustat_t *info;
>> + long action;
>> +
>> + clear_csr_estat(1 << INT_SWI0);
>> +
>> + info = this_cpu_ptr(&irq_stat);
>> + do {
>> + action = atomic_xchg(&info->messages, 0);
>> + if (action & SMP_CALL_FUNCTION) {
>> + generic_smp_call_function_interrupt();
>> + info->ipi_irqs[IPI_CALL_FUNCTION]++;
>> + }
>> +
>> + if (action & SMP_RESCHEDULE) {
>> + scheduler_ipi();
>> + info->ipi_irqs[IPI_RESCHEDULE]++;
>> + }
>> + } while (action);
>> +
>> + return IRQ_HANDLED;
>> +}
>> +
>> +static void pv_ipi_init(void)
>> +{
>> + int r, swi0;
>> +
>> + swi0 = get_percpu_irq(INT_SWI0);
>> + if (swi0 < 0)
>> + panic("SWI0 IRQ mapping failed\n");
>> + irq_set_percpu_devid(swi0);
>> + r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
>> + if (r < 0)
>> + panic("SWI0 IRQ request failed\n");
>> +}
>> +#endif
>> +
>> static bool kvm_para_available(void)
>> {
>> static int hypervisor_type;
>> @@ -32,10 +131,24 @@ static bool kvm_para_available(void)
>>
>> int __init pv_guest_init(void)
>> {
>> + int feature;
>> +
>> if (!cpu_has_hypervisor)
>> return 0;
>> if (!kvm_para_available())
>> return 0;
>>
>> + /*
>> + * check whether KVM hypervisor supports pv_ipi or not
>> + */
>> +#ifdef CONFIG_SMP
>> + feature = read_cpucfg(CPUCFG_KVM_FEATURE);
>> + if (feature & KVM_FEATURE_PV_IPI) {
>> + smp_ops.call_func_single_ipi = pv_send_ipi_single;
>> + smp_ops.call_func_ipi = pv_send_ipi_mask;
> From this patch I found that these function are supposed to send any
> types of IPI, when the naming is call_func_xxx? Maybe send_ipi_single
> and send_ipi_mask is more accurate.
yes, you are right. Will modify in the next patch.
Regards
Bibo Mao
>
> Huacai
>
>> + smp_ops.ipi_init = pv_ipi_init;
>> + }
>> +#endif
>> +
>> return 1;
>> }
>> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
>> index 46735ba49815..57b5706cedb9 100644
>> --- a/arch/loongarch/kernel/smp.c
>> +++ b/arch/loongarch/kernel/smp.c
>> @@ -285,7 +285,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
>> void loongson_init_secondary(void)
>> {
>> unsigned int cpu = smp_processor_id();
>> - unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
>> + unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
>> ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
>>
>> change_csr_ecfg(ECFG0_IM, imask);
>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
>> index f4e4df05f578..189b70bad825 100644
>> --- a/arch/loongarch/kvm/exit.c
>> +++ b/arch/loongarch/kvm/exit.c
>> @@ -227,6 +227,9 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
>> case CPUCFG_KVM_SIG:
>> vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
>> break;
>> + case CPUCFG_KVM_FEATURE:
>> + vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
>> + break;
>> default:
>> vcpu->arch.gprs[rd] = 0;
>> break;
>> @@ -699,12 +702,78 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
>> return RESUME_GUEST;
>> }
>>
>> +static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu)
>> +{
>> + unsigned long ipi_bitmap;
>> + unsigned int min, cpu, i;
>> + struct kvm_vcpu *dest;
>> +
>> + min = vcpu->arch.gprs[LOONGARCH_GPR_A3];
>> + for (i = 0; i < 2; i++) {
>> + ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i];
>> + if (!ipi_bitmap)
>> + continue;
>> +
>> + cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG);
>> + while (cpu < BITS_PER_LONG) {
>> + dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min);
>> + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG,
>> + cpu + 1);
>> + if (!dest)
>> + continue;
>> +
>> + /*
>> + * Send SWI0 to dest vcpu to emulate IPI interrupt
>> + */
>> + kvm_queue_irq(dest, INT_SWI0);
>> + kvm_vcpu_kick(dest);
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +/*
>> + * hypcall emulation always return to guest, Caller should check retval.
>> + */
>> +static void kvm_handle_pv_hcall(struct kvm_vcpu *vcpu)
>> +{
>> + unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0];
>> + long ret;
>> +
>> + switch (func) {
>> + case KVM_HC_FUNC_IPI:
>> + kvm_pv_send_ipi(vcpu);
>> + ret = KVM_HC_STATUS_SUCCESS;
>> + break;
>> + default:
>> + ret = KVM_HC_INVALID_CODE;
>> + break;
>> + };
>> +
>> + vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret;
>> +}
>> +
>> static int kvm_handle_hypcall(struct kvm_vcpu *vcpu)
>> {
>> + larch_inst inst;
>> + unsigned int code;
>> +
>> + inst.word = vcpu->arch.badi;
>> + code = inst.reg0i15_format.immediate;
>> update_pc(&vcpu->arch);
>>
>> - /* Treat it as noop intruction, only set return value */
>> - vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
>> + switch (code) {
>> + case KVM_HC_SERVICE:
>> + vcpu->stat.hvcl_exits++;
>> + kvm_handle_pv_hcall(vcpu);
>> + break;
>> + default:
>> + /* Treat it as noop intruction, only set return value */
>> + vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
>> + break;
>> + }
>> +
>> return RESUME_GUEST;
>> }
>>
>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>> index 97ca9c7160e6..80e05ba9b48d 100644
>> --- a/arch/loongarch/kvm/vcpu.c
>> +++ b/arch/loongarch/kvm/vcpu.c
>> @@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
>> STATS_DESC_COUNTER(VCPU, idle_exits),
>> STATS_DESC_COUNTER(VCPU, cpucfg_exits),
>> STATS_DESC_COUNTER(VCPU, signal_exits),
>> + STATS_DESC_COUNTER(VCPU, hvcl_exits)
>> };
>>
>> const struct kvm_stats_header kvm_vcpu_stats_header = {
>> --
>> 2.39.3
>>
>>
On 2024/1/29 下午9:11, Huacai Chen wrote:
> Hi, Bibo,
>
> Without this patch I can also create a SMP VM, so what problem does
> this patch want to solve?
With ipi irqchip, physical cpuid is used for dest cpu rather than
logical cpuid. And if ipi device is emulated in qemu side, there is
find_cpu_by_archid to get dest vcpu in file hw/intc/loongarch_ipi.c
Here with hypercall method, ipi is emulated in kvm kernel side, there
should be the same physical cpuid searching logic. And function
kvm_get_vcpu_by_cpuid is used with pv_ipi backend.
Regards
Bibo Mao
>
> Huacai
>
> On Mon, Jan 22, 2024 at 6:03 PM Bibo Mao <[email protected]> wrote:
>>
>> Physical cpuid is used to irq routing for irqchips such as ipi/msi/
>> extioi interrupt controller. And physical cpuid is stored at CSR
>> register LOONGARCH_CSR_CPUID, it can not be changed once vcpu is
>> created. Since different irqchips have different size definition
>> about physical cpuid, KVM uses the smallest cpuid from extioi, and
>> the max cpuid size is defines as 256.
>>
>> Signed-off-by: Bibo Mao <[email protected]>
>> ---
>> arch/loongarch/include/asm/kvm_host.h | 26 ++++++++
>> arch/loongarch/include/asm/kvm_vcpu.h | 1 +
>> arch/loongarch/kvm/vcpu.c | 93 ++++++++++++++++++++++++++-
>> arch/loongarch/kvm/vm.c | 11 ++++
>> 4 files changed, 130 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>> index 2d62f7b0d377..57399d7cf8b7 100644
>> --- a/arch/loongarch/include/asm/kvm_host.h
>> +++ b/arch/loongarch/include/asm/kvm_host.h
>> @@ -64,6 +64,30 @@ struct kvm_world_switch {
>>
>> #define MAX_PGTABLE_LEVELS 4
>>
>> +/*
>> + * Physical cpu id is used for interrupt routing, there are different
>> + * definitions about physical cpuid on different hardwares.
>> + * For LOONGARCH_CSR_CPUID register, max cpuid size if 512
>> + * For IPI HW, max dest CPUID size 1024
>> + * For extioi interrupt controller, max dest CPUID size is 256
>> + * For MSI interrupt controller, max supported CPUID size is 65536
>> + *
>> + * Currently max CPUID is defined as 256 for KVM hypervisor, in future
>> + * it will be expanded to 4096, including 16 packages at most. And every
>> + * package supports at most 256 vcpus
>> + */
>> +#define KVM_MAX_PHYID 256
>> +
>> +struct kvm_phyid_info {
>> + struct kvm_vcpu *vcpu;
>> + bool enabled;
>> +};
>> +
>> +struct kvm_phyid_map {
>> + int max_phyid;
>> + struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
>> +};
>> +
>> struct kvm_arch {
>> /* Guest physical mm */
>> kvm_pte_t *pgd;
>> @@ -71,6 +95,8 @@ struct kvm_arch {
>> unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
>> unsigned int pte_shifts[MAX_PGTABLE_LEVELS];
>> unsigned int root_level;
>> + struct mutex phyid_map_lock;
>> + struct kvm_phyid_map *phyid_map;
>>
>> s64 time_offset;
>> struct kvm_context __percpu *vmcs;
>> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
>> index e71ceb88f29e..2402129ee955 100644
>> --- a/arch/loongarch/include/asm/kvm_vcpu.h
>> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
>> @@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
>> void kvm_restore_timer(struct kvm_vcpu *vcpu);
>>
>> int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
>> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
>>
>> /*
>> * Loongarch KVM guest interrupt handling
>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>> index 27701991886d..97ca9c7160e6 100644
>> --- a/arch/loongarch/kvm/vcpu.c
>> +++ b/arch/loongarch/kvm/vcpu.c
>> @@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
>> return 0;
>> }
>>
>> +static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
>> +{
>> + int cpuid;
>> + struct loongarch_csrs *csr = vcpu->arch.csr;
>> + struct kvm_phyid_map *map;
>> +
>> + if (val >= KVM_MAX_PHYID)
>> + return -EINVAL;
>> +
>> + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
>> + map = vcpu->kvm->arch.phyid_map;
>> + mutex_lock(&vcpu->kvm->arch.phyid_map_lock);
>> + if (map->phys_map[cpuid].enabled) {
>> + /*
>> + * Cpuid is already set before
>> + * Forbid changing different cpuid at runtime
>> + */
>> + if (cpuid != val) {
>> + /*
>> + * Cpuid 0 is initial value for vcpu, maybe invalid
>> + * unset value for vcpu
>> + */
>> + if (cpuid) {
>> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> + return -EINVAL;
>> + }
>> + } else {
>> + /* Discard duplicated cpuid set */
>> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> + return 0;
>> + }
>> + }
>> +
>> + if (map->phys_map[val].enabled) {
>> + /*
>> + * New cpuid is already set with other vcpu
>> + * Forbid sharing the same cpuid between different vcpus
>> + */
>> + if (map->phys_map[val].vcpu != vcpu) {
>> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> + return -EINVAL;
>> + }
>> +
>> + /* Discard duplicated cpuid set operation*/
>> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> + return 0;
>> + }
>> +
>> + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
>> + map->phys_map[val].enabled = true;
>> + map->phys_map[val].vcpu = vcpu;
>> + if (map->max_phyid < val)
>> + map->max_phyid = val;
>> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> + return 0;
>> +}
>> +
>> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
>> +{
>> + struct kvm_phyid_map *map;
>> +
>> + if (cpuid >= KVM_MAX_PHYID)
>> + return NULL;
>> +
>> + map = kvm->arch.phyid_map;
>> + if (map->phys_map[cpuid].enabled)
>> + return map->phys_map[cpuid].vcpu;
>> +
>> + return NULL;
>> +}
>> +
>> +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
>> +{
>> + int cpuid;
>> + struct loongarch_csrs *csr = vcpu->arch.csr;
>> + struct kvm_phyid_map *map;
>> +
>> + map = vcpu->kvm->arch.phyid_map;
>> + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
>> + if (cpuid >= KVM_MAX_PHYID)
>> + return;
>> +
>> + if (map->phys_map[cpuid].enabled) {
>> + map->phys_map[cpuid].vcpu = NULL;
>> + map->phys_map[cpuid].enabled = false;
>> + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0);
>> + }
>> +}
>> +
>> static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
>> {
>> int ret = 0, gintc;
>> @@ -291,7 +380,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
>> kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc);
>>
>> return ret;
>> - }
>> + } else if (id == LOONGARCH_CSR_CPUID)
>> + return kvm_set_cpuid(vcpu, val);
>>
>> kvm_write_sw_gcsr(csr, id, val);
>>
>> @@ -925,6 +1015,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>> hrtimer_cancel(&vcpu->arch.swtimer);
>> kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
>> kfree(vcpu->arch.csr);
>> + kvm_drop_cpuid(vcpu);
>>
>> /*
>> * If the vCPU is freed and reused as another vCPU, we don't want the
>> diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
>> index 0a37f6fa8f2d..6fd5916ebef3 100644
>> --- a/arch/loongarch/kvm/vm.c
>> +++ b/arch/loongarch/kvm/vm.c
>> @@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>> if (!kvm->arch.pgd)
>> return -ENOMEM;
>>
>> + kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map),
>> + GFP_KERNEL_ACCOUNT);
>> + if (!kvm->arch.phyid_map) {
>> + free_page((unsigned long)kvm->arch.pgd);
>> + kvm->arch.pgd = NULL;
>> + return -ENOMEM;
>> + }
>> +
>> kvm_init_vmcs(kvm);
>> kvm->arch.gpa_size = BIT(cpu_vabits - 1);
>> kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
>> @@ -44,6 +52,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>> for (i = 0; i <= kvm->arch.root_level; i++)
>> kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3);
>>
>> + mutex_init(&kvm->arch.phyid_map_lock);
>> return 0;
>> }
>>
>> @@ -51,7 +60,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>> {
>> kvm_destroy_vcpus(kvm);
>> free_page((unsigned long)kvm->arch.pgd);
>> + kvfree(kvm->arch.phyid_map);
>> kvm->arch.pgd = NULL;
>> + kvm->arch.phyid_map = NULL;
>> }
>>
>> int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>> --
>> 2.39.3
>>