This patchset adds pv ipi support for VM. On physical machine, ipi HW
uses IOCSR registers, however there is trap into hypervisor when vcpu
accesses IOCSR registers if system is in VM mode. SWI is a interrupt
mechanism like SGI on ARM, software can send interrupt to CPU, only that
on LoongArch SWI can only be sent to local CPU now. So SWI can not used
for IPI on real HW system, however it can be used on VM when combined with
hypercall method. This patch uses SWI interrupt for IPI mechanism, SWI
injection uses hypercall method. And there is one trap with IPI sending,
however with SWI interrupt handler there is no trap.
This patch passes to runltp testcases, and unixbench score is 99% of
that on physical machine on 3C5000 single way machine. Here is unixbench
score with 16 cores on 3C5000 single way machine.
----------------UnixBench score on 3C5000 machine with 16 cores --------
Dhrystone 2 using register variables 116700.0 339749961.8 29113.1
Double-Precision Whetstone 55.0 57716.9 10494.0
Execl Throughput 43.0 33563.4 7805.4
File Copy 1024 bufsize 2000 maxblocks 3960.0 1017912.5 2570.5
File Copy 256 bufsize 500 maxblocks 1655.0 260061.4 1571.4
File Copy 4096 bufsize 8000 maxblocks 5800.0 3216109.4 5545.0
Pipe Throughput 12440.0 18404312.0 14794.5
Pipe-based Context Switching 4000.0 3395856.2 8489.6
Process Creation 126.0 55684.8 4419.4
Shell Scripts (1 concurrent) 42.4 55901.8 13184.4
Shell Scripts (8 concurrent) 6.0 7396.5 12327.5
System Call Overhead 15000.0 6997351.4 4664.9
System Benchmarks Index Score 7288.6
----------------UnixBench score on VM with 16 cores -----------------
Dhrystone 2 using register variables 116700.0 341649555.5 29275.9
Double-Precision Whetstone 55.0 57490.9 10452.9
Execl Throughput 43.0 33663.8 7828.8
File Copy 1024 bufsize 2000 maxblocks 3960.0 1047631.2 2645.5
File Copy 256 bufsize 500 maxblocks 1655.0 286671.0 1732.2
File Copy 4096 bufsize 8000 maxblocks 5800.0 3243588.2 5592.4
Pipe Throughput 12440.0 16353087.8 13145.6
Pipe-based Context Switching 4000.0 3100690.0 7751.7
Process Creation 126.0 51502.1 4087.5
Shell Scripts (1 concurrent) 42.4 56665.3 13364.4
Shell Scripts (8 concurrent) 6.0 7412.1 12353.4
System Call Overhead 15000.0 6962239.6 4641.5
System Benchmarks Index Score 7205.8
---
Change in V2:
1. Add hw cpuid map support since ipi routing uses hw cpuid
2. Refine changelog description
3. Add hypercall statistic support for vcpu
4. Set percpu pv ipi message buffer aligned with cacheline
5. Refine pv ipi send logic, do not send ipi message with if there is
pending ipi message.
---
Bibo Mao (6):
LoongArch: KVM: Add hypercall instruction emulation support
LoongArch: KVM: Add cpucfg area for kvm hypervisor
LoongArch/smp: Refine ipi ops on LoongArch platform
LoongArch: Add paravirt interface for guest kernel
LoongArch: KVM: Add physical cpuid map support
LoongArch: Add pv ipi support on LoongArch system
arch/loongarch/Kconfig | 9 +
arch/loongarch/include/asm/Kbuild | 1 -
arch/loongarch/include/asm/hardirq.h | 5 +
arch/loongarch/include/asm/inst.h | 1 +
arch/loongarch/include/asm/irq.h | 10 +-
arch/loongarch/include/asm/kvm_host.h | 27 +++
arch/loongarch/include/asm/kvm_para.h | 157 ++++++++++++++++++
arch/loongarch/include/asm/kvm_vcpu.h | 1 +
arch/loongarch/include/asm/loongarch.h | 10 ++
arch/loongarch/include/asm/paravirt.h | 27 +++
.../include/asm/paravirt_api_clock.h | 1 +
arch/loongarch/include/asm/smp.h | 31 ++--
arch/loongarch/include/uapi/asm/Kbuild | 2 -
arch/loongarch/kernel/Makefile | 1 +
arch/loongarch/kernel/irq.c | 24 +--
arch/loongarch/kernel/paravirt.c | 151 +++++++++++++++++
arch/loongarch/kernel/perf_event.c | 14 +-
arch/loongarch/kernel/setup.c | 2 +
arch/loongarch/kernel/smp.c | 60 ++++---
arch/loongarch/kernel/time.c | 12 +-
arch/loongarch/kvm/exit.c | 122 ++++++++++++--
arch/loongarch/kvm/vcpu.c | 62 ++++++-
arch/loongarch/kvm/vm.c | 11 ++
23 files changed, 639 insertions(+), 102 deletions(-)
create mode 100644 arch/loongarch/include/asm/kvm_para.h
create mode 100644 arch/loongarch/include/asm/paravirt.h
create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
delete mode 100644 arch/loongarch/include/uapi/asm/Kbuild
create mode 100644 arch/loongarch/kernel/paravirt.c
base-commit: 52b1853b080a082ec3749c3a9577f6c71b1d4a90
--
2.39.3
System will trap into hypervisor when executing cpucfg instruction.
And now hardware only uses the area 0 - 20 for actual usage, here
one specified area 0x10000000 -- 0x100000ff is used for KVM hypervisor,
and the area can be extended for other hypervisors in future.
Signed-off-by: Bibo Mao <[email protected]>
---
arch/loongarch/include/asm/inst.h | 1 +
arch/loongarch/include/asm/loongarch.h | 9 +++++
arch/loongarch/kvm/exit.c | 46 +++++++++++++++++---------
3 files changed, 40 insertions(+), 16 deletions(-)
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index d8f637f9e400..ad120f924905 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -67,6 +67,7 @@ enum reg2_op {
revhd_op = 0x11,
extwh_op = 0x16,
extwb_op = 0x17,
+ cpucfg_op = 0x1b,
iocsrrdb_op = 0x19200,
iocsrrdh_op = 0x19201,
iocsrrdw_op = 0x19202,
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 46366e783c84..a03b466555a1 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -158,6 +158,15 @@
#define CPUCFG48_VFPU_CG BIT(2)
#define CPUCFG48_RAM_CG BIT(3)
+/*
+ * cpucfg index area: 0x10000000 -- 0x100000ff
+ * SW emulation for KVM hypervirsor
+ */
+#define CPUCFG_KVM_BASE 0x10000000UL
+#define CPUCFG_KVM_SIZE 0x100
+#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
+#define KVM_SIGNATURE "KVM\0"
+#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
#ifndef __ASSEMBLY__
/* CSR */
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index 59e5fe221982..e233d7b3b76d 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -206,10 +206,37 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
-static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
{
int rd, rj;
unsigned int index;
+
+ rd = inst.reg2_format.rd;
+ rj = inst.reg2_format.rj;
+ ++vcpu->stat.cpucfg_exits;
+ index = vcpu->arch.gprs[rj];
+
+ /*
+ * By LoongArch Reference Manual 2.2.10.5
+ * Return value is 0 for undefined cpucfg index
+ */
+ switch (index) {
+ case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+ vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
+ break;
+ case CPUCFG_KVM_SIG:
+ vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
+ break;
+ default:
+ vcpu->arch.gprs[rd] = 0;
+ break;
+ }
+
+ return EMULATE_DONE;
+}
+
+static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+{
unsigned long curr_pc;
larch_inst inst;
enum emulation_result er = EMULATE_DONE;
@@ -224,21 +251,8 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
er = EMULATE_FAIL;
switch (((inst.word >> 24) & 0xff)) {
case 0x0: /* CPUCFG GSPR */
- if (inst.reg2_format.opcode == 0x1B) {
- rd = inst.reg2_format.rd;
- rj = inst.reg2_format.rj;
- ++vcpu->stat.cpucfg_exits;
- index = vcpu->arch.gprs[rj];
- er = EMULATE_DONE;
- /*
- * By LoongArch Reference Manual 2.2.10.5
- * return value is 0 for undefined cpucfg index
- */
- if (index < KVM_MAX_CPUCFG_REGS)
- vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
- else
- vcpu->arch.gprs[rd] = 0;
- }
+ if (inst.reg2_format.opcode == cpucfg_op)
+ er = kvm_emu_cpucfg(vcpu, inst);
break;
case 0x4: /* CSR{RD,WR,XCHG} GSPR */
er = kvm_handle_csr(vcpu, inst);
--
2.39.3
The patch add paravirt interface for guest kernel, function
pv_guest_init firstly checks whether system runs on VM mode. If kernel
runs on VM mode, it will call function kvm_para_available to detect
whether current VMM is KVM hypervisor. And the paravirt function can work
only if current VMM is KVM hypervisor, and there is only KVM hypervisor
supported on LoongArch now.
Signed-off-by: Bibo Mao <[email protected]>
---
arch/loongarch/Kconfig | 9 ++++
arch/loongarch/include/asm/kvm_para.h | 7 ++++
arch/loongarch/include/asm/paravirt.h | 27 ++++++++++++
.../include/asm/paravirt_api_clock.h | 1 +
arch/loongarch/kernel/Makefile | 1 +
arch/loongarch/kernel/paravirt.c | 41 +++++++++++++++++++
arch/loongarch/kernel/setup.c | 2 +
7 files changed, 88 insertions(+)
create mode 100644 arch/loongarch/include/asm/paravirt.h
create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h
create mode 100644 arch/loongarch/kernel/paravirt.c
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ee123820a476..d8ccaf46a50d 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -564,6 +564,15 @@ config CPU_HAS_PREFETCH
bool
default y
+config PARAVIRT
+ bool "Enable paravirtualization code"
+ depends on AS_HAS_LVZ_EXTENSION
+ help
+ This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization. However, when run without a hypervisor
+ the kernel is theoretically slower and slightly larger.
+
config ARCH_SUPPORTS_KEXEC
def_bool y
diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
index 9425d3b7e486..41200e922a82 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -2,6 +2,13 @@
#ifndef _ASM_LOONGARCH_KVM_PARA_H
#define _ASM_LOONGARCH_KVM_PARA_H
+/*
+ * Hypcall code field
+ */
+#define HYPERVISOR_KVM 1
+#define HYPERVISOR_VENDOR_SHIFT 8
+#define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
+
/*
* LoongArch hypcall return code
*/
diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h
new file mode 100644
index 000000000000..b64813592ba0
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_PARAVIRT_H
+#define _ASM_LOONGARCH_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include <linux/static_call_types.h>
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+ return static_call(pv_steal_clock)(cpu);
+}
+
+int pv_guest_init(void);
+#else
+static inline int pv_guest_init(void)
+{
+ return 0;
+}
+
+#endif // CONFIG_PARAVIRT
+#endif
diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h b/arch/loongarch/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3c808c680370..662e6e9de12d 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_PROC_FS) += proc.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
new file mode 100644
index 000000000000..21d01d05791a
--- /dev/null
+++ b/arch/loongarch/kernel/paravirt.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kvm_para.h>
+#include <asm/paravirt.h>
+#include <linux/static_call.h>
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static u64 native_steal_clock(int cpu)
+{
+ return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+static bool kvm_para_available(void)
+{
+ static int hypervisor_type;
+ int config;
+
+ if (!hypervisor_type) {
+ config = read_cpucfg(CPUCFG_KVM_SIG);
+ if (!memcmp(&config, KVM_SIGNATURE, 4))
+ hypervisor_type = HYPERVISOR_KVM;
+ }
+
+ return hypervisor_type == HYPERVISOR_KVM;
+}
+
+int __init pv_guest_init(void)
+{
+ if (!cpu_has_hypervisor)
+ return 0;
+ if (!kvm_para_available())
+ return 0;
+
+ return 1;
+}
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index d183a745fb85..fa680bdd0bd1 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -43,6 +43,7 @@
#include <asm/efi.h>
#include <asm/loongson.h>
#include <asm/numa.h>
+#include <asm/paravirt.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/setup.h>
@@ -376,6 +377,7 @@ void __init platform_init(void)
pr_info("The BIOS Version: %s\n", b_info.bios_version);
efi_runtime_init();
+ pv_guest_init();
}
static void __init check_kernel_sections_mem(void)
--
2.39.3
This patch refines ipi handling on LoongArch platform, there are
three changes with this patch.
1. Add generic get_percpu_irq api, replace some percpu irq function
such as get_ipi_irq/get_pmc_irq/get_timer_irq with get_percpu_irq.
2. Change parameter action definition with function
loongson_send_ipi_single and loongson_send_ipi_mask. Code encoding is used
here rather than bitmap encoding for ipi action, ipi hw sender uses action
code, and ipi receiver will get action bitmap encoding, the ipi hw will
convert it into bitmap in ipi message buffer.
3. Add smp_ops on LoongArch platform so that pv ipi can be used later.
Signed-off-by: Bibo Mao <[email protected]>
---
arch/loongarch/include/asm/hardirq.h | 4 ++
arch/loongarch/include/asm/irq.h | 10 ++++-
arch/loongarch/include/asm/smp.h | 31 +++++++--------
arch/loongarch/kernel/irq.c | 22 +----------
arch/loongarch/kernel/perf_event.c | 14 +------
arch/loongarch/kernel/smp.c | 58 +++++++++++++++++++---------
arch/loongarch/kernel/time.c | 12 +-----
7 files changed, 71 insertions(+), 80 deletions(-)
diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
index 0ef3b18f8980..9f0038e19c7f 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -12,6 +12,10 @@
extern void ack_bad_irq(unsigned int irq);
#define ack_bad_irq ack_bad_irq
+enum ipi_msg_type {
+ IPI_RESCHEDULE,
+ IPI_CALL_FUNCTION,
+};
#define NR_IPI 2
typedef struct {
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 218b4da0ea90..00101b6d601e 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -117,8 +117,16 @@ extern struct fwnode_handle *liointc_handle;
extern struct fwnode_handle *pch_lpc_handle;
extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
-extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
+static inline int get_percpu_irq(int vector)
+{
+ struct irq_domain *d;
+
+ d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+ if (d)
+ return irq_create_mapping(d, vector);
+ return -EINVAL;
+}
#include <asm-generic/irq.h>
#endif /* _ASM_IRQ_H */
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index f81e5f01d619..330f1cb3741c 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -12,6 +12,13 @@
#include <linux/threads.h>
#include <linux/cpumask.h>
+struct smp_ops {
+ void (*call_func_ipi)(const struct cpumask *mask, unsigned int action);
+ void (*call_func_single_ipi)(int cpu, unsigned int action);
+ void (*ipi_init)(void);
+};
+
+extern struct smp_ops smp_ops;
extern int smp_num_siblings;
extern int num_processors;
extern int disabled_cpus;
@@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
void loongson_boot_secondary(int cpu, struct task_struct *idle);
void loongson_init_secondary(void);
void loongson_smp_finish(void);
-void loongson_send_ipi_single(int cpu, unsigned int action);
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
#ifdef CONFIG_HOTPLUG_CPU
int loongson_cpu_disable(void);
void loongson_cpu_die(unsigned int cpu);
@@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
#define cpu_physical_id(cpu) cpu_logical_map(cpu)
-#define SMP_BOOT_CPU 0x1
-#define SMP_RESCHEDULE 0x2
-#define SMP_CALL_FUNCTION 0x4
+#define ACTTION_BOOT_CPU 0
+#define ACTTION_RESCHEDULE 1
+#define ACTTION_CALL_FUNCTION 2
+#define SMP_BOOT_CPU BIT(ACTTION_BOOT_CPU)
+#define SMP_RESCHEDULE BIT(ACTTION_RESCHEDULE)
+#define SMP_CALL_FUNCTION BIT(ACTTION_CALL_FUNCTION)
struct secondary_data {
unsigned long stack;
@@ -71,7 +79,8 @@ extern struct secondary_data cpuboot_data;
extern asmlinkage void smpboot_entry(void);
extern asmlinkage void start_secondary(void);
-
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
extern void calculate_cpu_foreign_map(void);
/*
@@ -79,16 +88,6 @@ extern void calculate_cpu_foreign_map(void);
*/
extern void show_ipi_list(struct seq_file *p, int prec);
-static inline void arch_send_call_function_single_ipi(int cpu)
-{
- loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
-}
-
-static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
-{
- loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
-}
-
#ifdef CONFIG_HOTPLUG_CPU
static inline int __cpu_disable(void)
{
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 883e5066ae44..1b58f7c3eed9 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void)
acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse);
}
-static int __init get_ipi_irq(void)
-{
- struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
-
- if (d)
- return irq_create_mapping(d, INT_IPI);
-
- return -EINVAL;
-}
-
void __init init_IRQ(void)
{
int i;
-#ifdef CONFIG_SMP
- int r, ipi_irq;
- static int ipi_dummy_dev;
-#endif
unsigned int order = get_order(IRQ_STACK_SIZE);
struct page *page;
@@ -113,13 +99,7 @@ void __init init_IRQ(void)
init_vec_parent_group();
irqchip_init();
#ifdef CONFIG_SMP
- ipi_irq = get_ipi_irq();
- if (ipi_irq < 0)
- panic("IPI IRQ mapping failed\n");
- irq_set_percpu_devid(ipi_irq);
- r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev);
- if (r < 0)
- panic("IPI IRQ request failed\n");
+ smp_ops.ipi_init();
#endif
for (i = 0; i < NR_IRQS; i++)
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
index 0491bf453cd4..3265c8f33223 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -456,16 +456,6 @@ static void loongarch_pmu_disable(struct pmu *pmu)
static DEFINE_MUTEX(pmu_reserve_mutex);
static atomic_t active_events = ATOMIC_INIT(0);
-static int get_pmc_irq(void)
-{
- struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
-
- if (d)
- return irq_create_mapping(d, INT_PCOV);
-
- return -EINVAL;
-}
-
static void reset_counters(void *arg);
static int __hw_perf_event_init(struct perf_event *event);
@@ -473,7 +463,7 @@ static void hw_perf_event_destroy(struct perf_event *event)
{
if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
on_each_cpu(reset_counters, NULL, 1);
- free_irq(get_pmc_irq(), &loongarch_pmu);
+ free_irq(get_percpu_irq(INT_PCOV), &loongarch_pmu);
mutex_unlock(&pmu_reserve_mutex);
}
}
@@ -562,7 +552,7 @@ static int loongarch_pmu_event_init(struct perf_event *event)
if (event->cpu >= 0 && !cpu_online(event->cpu))
return -ENODEV;
- irq = get_pmc_irq();
+ irq = get_percpu_irq(INT_PCOV);
flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED;
if (!atomic_inc_not_zero(&active_events)) {
mutex_lock(&pmu_reserve_mutex);
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 5bca12d16e06..f4eb6f6948d4 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -66,11 +66,6 @@ static cpumask_t cpu_core_setup_map;
struct secondary_data cpuboot_data;
static DEFINE_PER_CPU(int, cpu_state);
-enum ipi_msg_type {
- IPI_RESCHEDULE,
- IPI_CALL_FUNCTION,
-};
-
static const char *ipi_types[NR_IPI] __tracepoint_string = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNCTION] = "Function call interrupts",
@@ -123,24 +118,19 @@ static u32 ipi_read_clear(int cpu)
static void ipi_write_action(int cpu, u32 action)
{
- unsigned int irq = 0;
-
- while ((irq = ffs(action))) {
- uint32_t val = IOCSR_IPI_SEND_BLOCKING;
+ uint32_t val;
- val |= (irq - 1);
- val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
- iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
- action &= ~BIT(irq - 1);
- }
+ val = IOCSR_IPI_SEND_BLOCKING | action;
+ val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
+ iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
}
-void loongson_send_ipi_single(int cpu, unsigned int action)
+static void loongson_send_ipi_single(int cpu, unsigned int action)
{
ipi_write_action(cpu_logical_map(cpu), (u32)action);
}
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+static void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
@@ -148,6 +138,16 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
ipi_write_action(cpu_logical_map(i), (u32)action);
}
+void arch_send_call_function_single_ipi(int cpu)
+{
+ smp_ops.call_func_single_ipi(cpu, ACTTION_CALL_FUNCTION);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+ smp_ops.call_func_ipi(mask, ACTTION_CALL_FUNCTION);
+}
+
/*
* This function sends a 'reschedule' IPI to another CPU.
* it goes straight through and wastes no time serializing
@@ -155,11 +155,11 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
*/
void arch_smp_send_reschedule(int cpu)
{
- loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
+ smp_ops.call_func_single_ipi(cpu, ACTTION_RESCHEDULE);
}
EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
-irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
+static irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
{
unsigned int action;
unsigned int cpu = smp_processor_id();
@@ -179,6 +179,26 @@ irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
return IRQ_HANDLED;
}
+static void loongson_ipi_init(void)
+{
+ int r, ipi_irq;
+
+ ipi_irq = get_percpu_irq(INT_IPI);
+ if (ipi_irq < 0)
+ panic("IPI IRQ mapping failed\n");
+
+ irq_set_percpu_devid(ipi_irq);
+ r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &irq_stat);
+ if (r < 0)
+ panic("IPI IRQ request failed\n");
+}
+
+struct smp_ops smp_ops = {
+ .call_func_single_ipi = loongson_send_ipi_single,
+ .call_func_ipi = loongson_send_ipi_mask,
+ .ipi_init = loongson_ipi_init,
+};
+
static void __init fdt_smp_setup(void)
{
#ifdef CONFIG_OF
@@ -253,7 +273,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
csr_mail_send(entry, cpu_logical_map(cpu), 0);
- loongson_send_ipi_single(cpu, SMP_BOOT_CPU);
+ loongson_send_ipi_single(cpu, ACTTION_BOOT_CPU);
}
/*
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index e7015f7b70e3..fd5354f9be7c 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -123,16 +123,6 @@ void sync_counter(void)
csr_write64(init_offset, LOONGARCH_CSR_CNTC);
}
-static int get_timer_irq(void)
-{
- struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
-
- if (d)
- return irq_create_mapping(d, INT_TI);
-
- return -EINVAL;
-}
-
int constant_clockevent_init(void)
{
unsigned int cpu = smp_processor_id();
@@ -142,7 +132,7 @@ int constant_clockevent_init(void)
static int irq = 0, timer_irq_installed = 0;
if (!timer_irq_installed) {
- irq = get_timer_irq();
+ irq = get_percpu_irq(INT_TI);
if (irq < 0)
pr_err("Failed to map irq %d (timer)\n", irq);
}
--
2.39.3
On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
register access on ipi sender and two iocsr access on ipi receiver
which is ipi interrupt handler. On VM mode all iocsr registers
accessing will trap into hypervisor. So with one ipi hw notification
there will be three times of trap.
This patch adds pv ipi support for VM, hypercall instruction is used
to ipi sender, and hypervisor will inject SWI on the VM. During SWI
interrupt handler, only estat CSR register is written to clear irq.
Estat CSR register access will not trap into hypervisor. So with pv ipi
supported, pv ipi sender will trap into hypervsor one time, pv ipi
revicer will not trap, there is only one time of trap.
Also this patch adds ipi multicast support, the method is similar with
x86. With ipi multicast support, ipi notification can be sent to at most
64 vcpus at one time. It reduces trap into hypervisor greatly.
Signed-off-by: Bibo Mao <[email protected]>
---
arch/loongarch/include/asm/hardirq.h | 1 +
arch/loongarch/include/asm/kvm_host.h | 1 +
arch/loongarch/include/asm/kvm_para.h | 124 +++++++++++++++++++++++++
arch/loongarch/include/asm/loongarch.h | 1 +
arch/loongarch/kernel/irq.c | 2 +-
arch/loongarch/kernel/paravirt.c | 110 ++++++++++++++++++++++
arch/loongarch/kernel/smp.c | 2 +-
arch/loongarch/kvm/exit.c | 70 +++++++++++++-
arch/loongarch/kvm/vcpu.c | 1 +
9 files changed, 308 insertions(+), 4 deletions(-)
diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
index 9f0038e19c7f..8a611843c1f0 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -21,6 +21,7 @@ enum ipi_msg_type {
typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
+ atomic_t messages ____cacheline_aligned_in_smp;
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index 93acba84f87e..f21c60ce58be 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
u64 idle_exits;
u64 cpucfg_exits;
u64 signal_exits;
+ u64 hvcl_exits;
};
#define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
index 41200e922a82..a25a84e372b9 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -9,6 +9,10 @@
#define HYPERVISOR_VENDOR_SHIFT 8
#define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
+#define KVM_HC_CODE_SERVICE 0
+#define KVM_HC_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HC_CODE_SERVICE)
+#define KVM_HC_FUNC_IPI 1
+
/*
* LoongArch hypcall return code
*/
@@ -16,6 +20,126 @@
#define KVM_HC_INVALID_CODE -1UL
#define KVM_HC_INVALID_PARAMETER -2UL
+/*
+ * Hypercalls interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in v0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall(u64 fid)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r" (fun)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r" (fun), "r" (a1)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+ unsigned long arg0, unsigned long arg1)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r" (fun), "r" (a1), "r" (a2)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall3(u64 fid,
+ unsigned long arg0, unsigned long arg1, unsigned long arg2)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+ register unsigned long a3 asm("a3") = arg2;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r" (fun), "r" (a1), "r" (a2), "r" (a3)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall4(u64 fid,
+ unsigned long arg0, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+ register unsigned long a3 asm("a3") = arg2;
+ register unsigned long a4 asm("a4") = arg3;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall5(u64 fid,
+ unsigned long arg0, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+ register unsigned long a3 asm("a3") = arg2;
+ register unsigned long a4 asm("a4") = arg3;
+ register unsigned long a5 asm("a5") = arg4;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5)
+ : "memory"
+ );
+
+ return ret;
+}
+
+
static inline unsigned int kvm_arch_para_features(void)
{
return 0;
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index a03b466555a1..a787b69f6fb0 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -167,6 +167,7 @@
#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
#define KVM_SIGNATURE "KVM\0"
#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
+#define KVM_FEATURE_PV_IPI BIT(1)
#ifndef __ASSEMBLY__
/* CSR */
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 1b58f7c3eed9..b5bd298c981f 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -113,5 +113,5 @@ void __init init_IRQ(void)
per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
}
- set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
+ set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
}
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index 21d01d05791a..b7264bc60524 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/types.h>
+#include <linux/interrupt.h>
#include <linux/jump_label.h>
#include <linux/kvm_para.h>
#include <asm/paravirt.h>
@@ -16,6 +17,101 @@ static u64 native_steal_clock(int cpu)
DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+#ifdef CONFIG_SMP
+static void pv_send_ipi_single(int cpu, unsigned int action)
+{
+ unsigned int min, old;
+ unsigned long bitmap = 0;
+ irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
+
+ action = BIT(action);
+ old = atomic_fetch_or(action, &info->messages);
+ if (old == 0) {
+ min = cpu_logical_map(cpu);
+ bitmap = 1;
+ kvm_hypercall2(KVM_HC_FUNC_IPI, bitmap, min);
+ }
+}
+
+static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+ unsigned int cpu, i, min = 0, max = 0, old;
+ u64 bitmap = 0;
+ irq_cpustat_t *info;
+
+ if (cpumask_empty(mask))
+ return;
+
+ action = BIT(action);
+ for_each_cpu(i, mask) {
+ info = &per_cpu(irq_stat, i);
+ old = atomic_fetch_or(action, &info->messages);
+ if (old)
+ continue;
+
+ cpu = cpu_logical_map(i);
+ if (!bitmap) {
+ min = max = cpu;
+ } else if (cpu > min && cpu < min + BITS_PER_LONG) {
+ max = cpu > max ? cpu : max;
+ } else if (cpu < min && (max - cpu) < BITS_PER_LONG) {
+ bitmap <<= min - cpu;
+ min = cpu;
+ } else {
+ /*
+ * Physical cpuid is sorted in ascending order ascend
+ * for the next mask calculation, send IPI here
+ * directly and skip the remainding cpus
+ */
+ kvm_hypercall2(KVM_HC_FUNC_IPI, bitmap, min);
+ min = max = cpu;
+ bitmap = 0;
+ }
+ __set_bit(cpu - min, (unsigned long *)&bitmap);
+ }
+
+ if (bitmap)
+ kvm_hypercall2(KVM_HC_FUNC_IPI, bitmap, min);
+}
+
+static irqreturn_t loongson_do_swi(int irq, void *dev)
+{
+ irq_cpustat_t *info;
+ long action;
+
+ clear_csr_estat(1 << INT_SWI0);
+
+ info = this_cpu_ptr(&irq_stat);
+ do {
+ action = atomic_xchg(&info->messages, 0);
+ if (action & SMP_CALL_FUNCTION) {
+ generic_smp_call_function_interrupt();
+ info->ipi_irqs[IPI_CALL_FUNCTION]++;
+ }
+
+ if (action & SMP_RESCHEDULE) {
+ scheduler_ipi();
+ info->ipi_irqs[IPI_RESCHEDULE]++;
+ }
+ } while (action);
+
+ return IRQ_HANDLED;
+}
+
+static void pv_ipi_init(void)
+{
+ int r, swi0;
+
+ swi0 = get_percpu_irq(INT_SWI0);
+ if (swi0 < 0)
+ panic("SWI0 IRQ mapping failed\n");
+ irq_set_percpu_devid(swi0);
+ r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
+ if (r < 0)
+ panic("SWI0 IRQ request failed\n");
+}
+#endif
+
static bool kvm_para_available(void)
{
static int hypervisor_type;
@@ -32,10 +128,24 @@ static bool kvm_para_available(void)
int __init pv_guest_init(void)
{
+ int feature;
+
if (!cpu_has_hypervisor)
return 0;
if (!kvm_para_available())
return 0;
+ /*
+ * check whether KVM hypervisor supports pv_ipi or not
+ */
+#ifdef CONFIG_SMP
+ feature = read_cpucfg(CPUCFG_KVM_FEATURE);
+ if (feature & KVM_FEATURE_PV_IPI) {
+ smp_ops.call_func_single_ipi = pv_send_ipi_single;
+ smp_ops.call_func_ipi = pv_send_ipi_mask;
+ smp_ops.ipi_init = pv_ipi_init;
+ }
+#endif
+
return 1;
}
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index f4eb6f6948d4..677430fb16f5 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -282,7 +282,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
void loongson_init_secondary(void)
{
unsigned int cpu = smp_processor_id();
- unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
+ unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
change_csr_ecfg(ECFG0_IM, imask);
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index e233d7b3b76d..51155ae2912f 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -227,6 +227,9 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
case CPUCFG_KVM_SIG:
vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
break;
+ case CPUCFG_KVM_FEATURE:
+ vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
+ break;
default:
vcpu->arch.gprs[rd] = 0;
break;
@@ -664,12 +667,75 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
+static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu)
+{
+ int ret = 0;
+ u64 ipi_bitmap;
+ unsigned int min, cpu;
+ struct kvm_vcpu *dest;
+
+ ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1];
+ min = vcpu->arch.gprs[LOONGARCH_GPR_A2];
+
+ if (ipi_bitmap) {
+ cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG);
+ while (cpu < BITS_PER_LONG) {
+ dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min);
+ if (dest) {
+ /*
+ * Send SWI0 to dest vcpu to emulate IPI interrupt
+ */
+ kvm_queue_irq(dest, INT_SWI0);
+ kvm_vcpu_kick(dest);
+ }
+ cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, cpu + 1);
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * hypcall emulation always return to guest, Caller should check retval.
+ */
+static void kvm_handle_pv_hcall(struct kvm_vcpu *vcpu)
+{
+ unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0];
+ long ret;
+
+ switch (func) {
+ case KVM_HC_FUNC_IPI:
+ kvm_pv_send_ipi(vcpu);
+ ret = KVM_HC_STATUS_SUCCESS;
+ break;
+ default:
+ ret = KVM_HC_INVALID_CODE;
+ break;
+ };
+
+ vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret;
+}
+
static int kvm_handle_hypcall(struct kvm_vcpu *vcpu)
{
+ larch_inst inst;
+ unsigned int code;
+
+ inst.word = vcpu->arch.badi;
+ code = inst.reg0i15_format.immediate;
update_pc(&vcpu->arch);
- /* Treat it as noop intruction, only set return value */
- vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
+ switch (code) {
+ case KVM_HC_SERVICE:
+ vcpu->stat.hvcl_exits++;
+ kvm_handle_pv_hcall(vcpu);
+ break;
+ default:
+ /* Treat it as noop intruction, only set return value */
+ vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
+ break;
+ }
+
return RESUME_GUEST;
}
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 9dc40a80ab5a..3cfaf0c74a66 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, idle_exits),
STATS_DESC_COUNTER(VCPU, cpucfg_exits),
STATS_DESC_COUNTER(VCPU, signal_exits),
+ STATS_DESC_COUNTER(VCPU, hvcl_exits)
};
const struct kvm_stats_header kvm_vcpu_stats_header = {
--
2.39.3
Physical cpuid is used to irq routing for irqchips such as ipi/msi/
extioi interrupt controller. And physical cpuid is stored at CSR
register LOONGARCH_CSR_CPUID, it can not be changed once vcpu is
created. Since different irqchips have different size definition
about physical cpuid, KVM uses the smallest cpuid from extioi, and
the max cpuid size is defines as 256.
Signed-off-by: Bibo Mao <[email protected]>
---
arch/loongarch/include/asm/kvm_host.h | 26 ++++++++++++
arch/loongarch/include/asm/kvm_vcpu.h | 1 +
arch/loongarch/kvm/vcpu.c | 61 ++++++++++++++++++++++++++-
arch/loongarch/kvm/vm.c | 11 +++++
4 files changed, 98 insertions(+), 1 deletion(-)
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index 0e89db020481..93acba84f87e 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -64,6 +64,30 @@ struct kvm_world_switch {
#define MAX_PGTABLE_LEVELS 4
+/*
+ * Physical cpu id is used for interrupt routing, there are different
+ * definitions about physical cpuid on different hardwares.
+ * For LOONGARCH_CSR_CPUID register, max cpuid size if 512
+ * For IPI HW, max dest CPUID size 1024
+ * For extioi interrupt controller, max dest CPUID size is 256
+ * For MSI interrupt controller, max supported CPUID size is 65536
+ *
+ * Currently max CPUID is defined as 256 for KVM hypervisor, in future
+ * it will be expanded to 4096, including 16 packages at most. And every
+ * package supports at most 256 vcpus
+ */
+#define KVM_MAX_PHYID 256
+
+struct kvm_phyid_info {
+ struct kvm_vcpu *vcpu;
+ bool enabled;
+};
+
+struct kvm_phyid_map {
+ int max_phyid;
+ struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
+};
+
struct kvm_arch {
/* Guest physical mm */
kvm_pte_t *pgd;
@@ -71,6 +95,8 @@ struct kvm_arch {
unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
unsigned int pte_shifts[MAX_PGTABLE_LEVELS];
unsigned int root_level;
+ struct mutex phyid_map_lock;
+ struct kvm_phyid_map *phyid_map;
s64 time_offset;
struct kvm_context __percpu *vmcs;
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
index 0e87652f780a..3019e260a3ae 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -61,6 +61,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
void kvm_restore_timer(struct kvm_vcpu *vcpu);
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
/*
* Loongarch KVM guest interrupt handling
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index cf1c4d64c1b7..9dc40a80ab5a 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -274,6 +274,63 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
return 0;
}
+static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
+{
+ int cpuid;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+ struct kvm_phyid_map *map;
+
+ if (val >= KVM_MAX_PHYID)
+ return -EINVAL;
+
+ cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+ if (cpuid == 0) {
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
+ map = vcpu->kvm->arch.phyid_map;
+ map->phys_map[val].enabled = true;
+ map->phys_map[val].vcpu = vcpu;
+
+ mutex_lock(&vcpu->kvm->arch.phyid_map_lock);
+ if (map->max_phyid < val)
+ map->max_phyid = val;
+ mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ } else if (cpuid != val)
+ return -EINVAL;
+
+ return 0;
+}
+
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
+{
+ struct kvm_phyid_map *map;
+
+ if (cpuid >= KVM_MAX_PHYID)
+ return NULL;
+
+ map = kvm->arch.phyid_map;
+ if (map->phys_map[cpuid].enabled)
+ return map->phys_map[cpuid].vcpu;
+
+ return NULL;
+}
+
+static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
+{
+ int cpuid;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+ struct kvm_phyid_map *map;
+
+ map = vcpu->kvm->arch.phyid_map;
+ cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+ if (cpuid >= KVM_MAX_PHYID)
+ return;
+
+ if (map->phys_map[cpuid].enabled) {
+ map->phys_map[cpuid].vcpu = NULL;
+ map->phys_map[cpuid].enabled = false;
+ }
+}
+
static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
{
int ret = 0, gintc;
@@ -291,7 +348,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc);
return ret;
- }
+ } else if (id == LOONGARCH_CSR_CPUID)
+ return kvm_set_cpuid(vcpu, val);
kvm_write_sw_gcsr(csr, id, val);
@@ -666,6 +724,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
hrtimer_cancel(&vcpu->arch.swtimer);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
kfree(vcpu->arch.csr);
+ kvm_drop_cpuid(vcpu);
/*
* If the vCPU is freed and reused as another vCPU, we don't want the
diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
index 0a37f6fa8f2d..6fd5916ebef3 100644
--- a/arch/loongarch/kvm/vm.c
+++ b/arch/loongarch/kvm/vm.c
@@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.pgd)
return -ENOMEM;
+ kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map),
+ GFP_KERNEL_ACCOUNT);
+ if (!kvm->arch.phyid_map) {
+ free_page((unsigned long)kvm->arch.pgd);
+ kvm->arch.pgd = NULL;
+ return -ENOMEM;
+ }
+
kvm_init_vmcs(kvm);
kvm->arch.gpa_size = BIT(cpu_vabits - 1);
kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
@@ -44,6 +52,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
for (i = 0; i <= kvm->arch.root_level; i++)
kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3);
+ mutex_init(&kvm->arch.phyid_map_lock);
return 0;
}
@@ -51,7 +60,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_destroy_vcpus(kvm);
free_page((unsigned long)kvm->arch.pgd);
+ kvfree(kvm->arch.phyid_map);
kvm->arch.pgd = NULL;
+ kvm->arch.phyid_map = NULL;
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
--
2.39.3