by maobibo

[permalink] [raw]

Subject: Re: [PATCH v3 6/6] LoongArch: Add pv ipi support on LoongArch system

On 2024/1/29 下午9:10, Huacai Chen wrote:
> Hi, Bibo,
>
> On Mon, Jan 22, 2024 at 6:03 PM Bibo Mao <[email protected]> wrote:
>>
>> On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
>> register access on ipi sender and two iocsr access on ipi receiver
>> which is ipi interrupt handler. On VM mode all iocsr registers
>> accessing will trap into hypervisor. So with one ipi hw notification
>> there will be three times of trap.
>>
>> This patch adds pv ipi support for VM, hypercall instruction is used
>> to ipi sender, and hypervisor will inject SWI on the VM. During SWI
>> interrupt handler, only estat CSR register is written to clear irq.
>> Estat CSR register access will not trap into hypervisor. So with pv ipi
>> supported, pv ipi sender will trap into hypervsor one time, pv ipi
>> revicer will not trap, there is only one time of trap.
>>
>> Also this patch adds ipi multicast support, the method is similar with
>> x86. With ipi multicast support, ipi notification can be sent to at most
>> 128 vcpus at one time. It reduces trap into hypervisor greatly.
>>
>> Signed-off-by: Bibo Mao <[email protected]>
>> ---
>> arch/loongarch/include/asm/hardirq.h | 1 +
>> arch/loongarch/include/asm/kvm_host.h | 1 +
>> arch/loongarch/include/asm/kvm_para.h | 124 +++++++++++++++++++++++++
>> arch/loongarch/include/asm/loongarch.h | 1 +
>> arch/loongarch/kernel/irq.c | 2 +-
>> arch/loongarch/kernel/paravirt.c | 113 ++++++++++++++++++++++
>> arch/loongarch/kernel/smp.c | 2 +-
>> arch/loongarch/kvm/exit.c | 73 ++++++++++++++-
>> arch/loongarch/kvm/vcpu.c | 1 +
>> 9 files changed, 314 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
>> index 9f0038e19c7f..8a611843c1f0 100644
>> --- a/arch/loongarch/include/asm/hardirq.h
>> +++ b/arch/loongarch/include/asm/hardirq.h
>> @@ -21,6 +21,7 @@ enum ipi_msg_type {
>> typedef struct {
>> unsigned int ipi_irqs[NR_IPI];
>> unsigned int __softirq_pending;
>> + atomic_t messages ____cacheline_aligned_in_smp;
>> } ____cacheline_aligned irq_cpustat_t;
>>
>> DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>> index 57399d7cf8b7..1bf927e2bfac 100644
>> --- a/arch/loongarch/include/asm/kvm_host.h
>> +++ b/arch/loongarch/include/asm/kvm_host.h
>> @@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
>> u64 idle_exits;
>> u64 cpucfg_exits;
>> u64 signal_exits;
>> + u64 hvcl_exits;
>> };
>>
>> #define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
>> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
>> index 41200e922a82..a25a84e372b9 100644
>> --- a/arch/loongarch/include/asm/kvm_para.h
>> +++ b/arch/loongarch/include/asm/kvm_para.h
>> @@ -9,6 +9,10 @@
>> #define HYPERVISOR_VENDOR_SHIFT 8
>> #define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
>>
>> +#define KVM_HC_CODE_SERVICE 0
>> +#define KVM_HC_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HC_CODE_SERVICE)
>> +#define KVM_HC_FUNC_IPI 1
>> +
>> /*
>> * LoongArch hypcall return code
>> */
>> @@ -16,6 +20,126 @@
>> #define KVM_HC_INVALID_CODE -1UL
>> #define KVM_HC_INVALID_PARAMETER -2UL
>>
>> +/*
>> + * Hypercalls interface for KVM hypervisor
>> + *
>> + * a0: function identifier
>> + * a1-a6: args
>> + * Return value will be placed in v0.
>> + * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
>> + */
>> +static __always_inline long kvm_hypercall(u64 fid)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r" (fun)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r" (fun), "r" (a1)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall2(u64 fid,
>> + unsigned long arg0, unsigned long arg1)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> + register unsigned long a2 asm("a2") = arg1;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r" (fun), "r" (a1), "r" (a2)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall3(u64 fid,
>> + unsigned long arg0, unsigned long arg1, unsigned long arg2)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> + register unsigned long a2 asm("a2") = arg1;
>> + register unsigned long a3 asm("a3") = arg2;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r" (fun), "r" (a1), "r" (a2), "r" (a3)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall4(u64 fid,
>> + unsigned long arg0, unsigned long arg1, unsigned long arg2,
>> + unsigned long arg3)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> + register unsigned long a2 asm("a2") = arg1;
>> + register unsigned long a3 asm("a3") = arg2;
>> + register unsigned long a4 asm("a4") = arg3;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long kvm_hypercall5(u64 fid,
>> + unsigned long arg0, unsigned long arg1, unsigned long arg2,
>> + unsigned long arg3, unsigned long arg4)
>> +{
>> + register long ret asm("v0");
>> + register unsigned long fun asm("a0") = fid;
>> + register unsigned long a1 asm("a1") = arg0;
>> + register unsigned long a2 asm("a2") = arg1;
>> + register unsigned long a3 asm("a3") = arg2;
>> + register unsigned long a4 asm("a4") = arg3;
>> + register unsigned long a5 asm("a5") = arg4;
>> +
>> + __asm__ __volatile__(
>> + "hvcl "__stringify(KVM_HC_SERVICE)
>> + : "=r" (ret)
>> + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5)
>> + : "memory"
>> + );
>> +
>> + return ret;
>> +}
>> +
>> +
>> static inline unsigned int kvm_arch_para_features(void)
>> {
>> return 0;
>> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
>> index a1d22e8b6f94..0ad36704cb4b 100644
>> --- a/arch/loongarch/include/asm/loongarch.h
>> +++ b/arch/loongarch/include/asm/loongarch.h
>> @@ -167,6 +167,7 @@
>> #define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
>> #define KVM_SIGNATURE "KVM\0"
>> #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
>> +#define KVM_FEATURE_PV_IPI BIT(1)
>>
>> #ifndef __ASSEMBLY__
>>
>> diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
>> index 1b58f7c3eed9..b5bd298c981f 100644
>> --- a/arch/loongarch/kernel/irq.c
>> +++ b/arch/loongarch/kernel/irq.c
>> @@ -113,5 +113,5 @@ void __init init_IRQ(void)
>> per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
>> }
>>
>> - set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
>> + set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
>> }
>> diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
>> index 21d01d05791a..b840a004995a 100644
>> --- a/arch/loongarch/kernel/paravirt.c
>> +++ b/arch/loongarch/kernel/paravirt.c
>> @@ -1,6 +1,7 @@
>> // SPDX-License-Identifier: GPL-2.0
>> #include <linux/export.h>
>> #include <linux/types.h>
>> +#include <linux/interrupt.h>
>> #include <linux/jump_label.h>
>> #include <linux/kvm_para.h>
>> #include <asm/paravirt.h>
>> @@ -16,6 +17,104 @@ static u64 native_steal_clock(int cpu)
>>
>> DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
>>
>> +#ifdef CONFIG_SMP
>> +static void pv_send_ipi_single(int cpu, unsigned int action)
>> +{
>> + unsigned int min, old;
>> + unsigned long bitmap = 0;
>> + irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
>> +
>> + action = BIT(action);
>> + old = atomic_fetch_or(action, &info->messages);
>> + if (old == 0) {
>> + min = cpu_logical_map(cpu);
>> + bitmap = 1;
>> + kvm_hypercall3(KVM_HC_FUNC_IPI, bitmap, 0, min);
>> + }
>> +}
>> +
>> +#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
>> +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
>> +{
>> + unsigned int cpu, i, min = 0, max = 0, old;
>> + __uint128_t bitmap = 0;
>> + irq_cpustat_t *info;
>> +
>> + if (cpumask_empty(mask))
>> + return;
>> +
>> + action = BIT(action);
>> + for_each_cpu(i, mask) {
>> + info = &per_cpu(irq_stat, i);
>> + old = atomic_fetch_or(action, &info->messages);
>> + if (old)
>> + continue;
>> +
>> + cpu = cpu_logical_map(i);
>> + if (!bitmap) {
>> + min = max = cpu;
>> + } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) {
>> + max = cpu > max ? cpu : max;
>> + } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) {
>> + bitmap <<= min - cpu;
>> + min = cpu;
>> + } else {
>> + /*
>> + * Physical cpuid is sorted in ascending order ascend
>> + * for the next mask calculation, send IPI here
>> + * directly and skip the remainding cpus
>> + */
>> + kvm_hypercall3(KVM_HC_FUNC_IPI, (unsigned long)bitmap,
>> + (unsigned long)(bitmap >> BITS_PER_LONG), min);
>> + min = max = cpu;
>> + bitmap = 0;
>> + }
>> + __set_bit(cpu - min, (unsigned long *)&bitmap);
>> + }
>> +
>> + if (bitmap)
>> + kvm_hypercall3(KVM_HC_FUNC_IPI, (unsigned long)bitmap,
>> + (unsigned long)(bitmap >> BITS_PER_LONG), min);
>> +}
>> +
>> +static irqreturn_t loongson_do_swi(int irq, void *dev)
>> +{
>> + irq_cpustat_t *info;
>> + long action;
>> +
>> + clear_csr_estat(1 << INT_SWI0);
>> +
>> + info = this_cpu_ptr(&irq_stat);
>> + do {
>> + action = atomic_xchg(&info->messages, 0);
>> + if (action & SMP_CALL_FUNCTION) {
>> + generic_smp_call_function_interrupt();
>> + info->ipi_irqs[IPI_CALL_FUNCTION]++;
>> + }
>> +
>> + if (action & SMP_RESCHEDULE) {
>> + scheduler_ipi();
>> + info->ipi_irqs[IPI_RESCHEDULE]++;
>> + }
>> + } while (action);
>> +
>> + return IRQ_HANDLED;
>> +}
>> +
>> +static void pv_ipi_init(void)
>> +{
>> + int r, swi0;
>> +
>> + swi0 = get_percpu_irq(INT_SWI0);
>> + if (swi0 < 0)
>> + panic("SWI0 IRQ mapping failed\n");
>> + irq_set_percpu_devid(swi0);
>> + r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
>> + if (r < 0)
>> + panic("SWI0 IRQ request failed\n");
>> +}
>> +#endif
>> +
>> static bool kvm_para_available(void)
>> {
>> static int hypervisor_type;
>> @@ -32,10 +131,24 @@ static bool kvm_para_available(void)
>>
>> int __init pv_guest_init(void)
>> {
>> + int feature;
>> +
>> if (!cpu_has_hypervisor)
>> return 0;
>> if (!kvm_para_available())
>> return 0;
>>
>> + /*
>> + * check whether KVM hypervisor supports pv_ipi or not
>> + */
>> +#ifdef CONFIG_SMP
>> + feature = read_cpucfg(CPUCFG_KVM_FEATURE);
>> + if (feature & KVM_FEATURE_PV_IPI) {
>> + smp_ops.call_func_single_ipi = pv_send_ipi_single;
>> + smp_ops.call_func_ipi = pv_send_ipi_mask;
> From this patch I found that these function are supposed to send any
> types of IPI, when the naming is call_func_xxx? Maybe send_ipi_single
> and send_ipi_mask is more accurate.
yes, you are right. Will modify in the next patch.

Regards
Bibo Mao

>
> Huacai
>
>> + smp_ops.ipi_init = pv_ipi_init;
>> + }
>> +#endif
>> +
>> return 1;
>> }
>> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
>> index 46735ba49815..57b5706cedb9 100644
>> --- a/arch/loongarch/kernel/smp.c
>> +++ b/arch/loongarch/kernel/smp.c
>> @@ -285,7 +285,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
>> void loongson_init_secondary(void)
>> {
>> unsigned int cpu = smp_processor_id();
>> - unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
>> + unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
>> ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
>>
>> change_csr_ecfg(ECFG0_IM, imask);
>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
>> index f4e4df05f578..189b70bad825 100644
>> --- a/arch/loongarch/kvm/exit.c
>> +++ b/arch/loongarch/kvm/exit.c
>> @@ -227,6 +227,9 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
>> case CPUCFG_KVM_SIG:
>> vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
>> break;
>> + case CPUCFG_KVM_FEATURE:
>> + vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
>> + break;
>> default:
>> vcpu->arch.gprs[rd] = 0;
>> break;
>> @@ -699,12 +702,78 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
>> return RESUME_GUEST;
>> }
>>
>> +static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu)
>> +{
>> + unsigned long ipi_bitmap;
>> + unsigned int min, cpu, i;
>> + struct kvm_vcpu *dest;
>> +
>> + min = vcpu->arch.gprs[LOONGARCH_GPR_A3];
>> + for (i = 0; i < 2; i++) {
>> + ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i];
>> + if (!ipi_bitmap)
>> + continue;
>> +
>> + cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG);
>> + while (cpu < BITS_PER_LONG) {
>> + dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min);
>> + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG,
>> + cpu + 1);
>> + if (!dest)
>> + continue;
>> +
>> + /*
>> + * Send SWI0 to dest vcpu to emulate IPI interrupt
>> + */
>> + kvm_queue_irq(dest, INT_SWI0);
>> + kvm_vcpu_kick(dest);
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +/*
>> + * hypcall emulation always return to guest, Caller should check retval.
>> + */
>> +static void kvm_handle_pv_hcall(struct kvm_vcpu *vcpu)
>> +{
>> + unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0];
>> + long ret;
>> +
>> + switch (func) {
>> + case KVM_HC_FUNC_IPI:
>> + kvm_pv_send_ipi(vcpu);
>> + ret = KVM_HC_STATUS_SUCCESS;
>> + break;
>> + default:
>> + ret = KVM_HC_INVALID_CODE;
>> + break;
>> + };
>> +
>> + vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret;
>> +}
>> +
>> static int kvm_handle_hypcall(struct kvm_vcpu *vcpu)
>> {
>> + larch_inst inst;
>> + unsigned int code;
>> +
>> + inst.word = vcpu->arch.badi;
>> + code = inst.reg0i15_format.immediate;
>> update_pc(&vcpu->arch);
>>
>> - /* Treat it as noop intruction, only set return value */
>> - vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
>> + switch (code) {
>> + case KVM_HC_SERVICE:
>> + vcpu->stat.hvcl_exits++;
>> + kvm_handle_pv_hcall(vcpu);
>> + break;
>> + default:
>> + /* Treat it as noop intruction, only set return value */
>> + vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
>> + break;
>> + }
>> +
>> return RESUME_GUEST;
>> }
>>
>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>> index 97ca9c7160e6..80e05ba9b48d 100644
>> --- a/arch/loongarch/kvm/vcpu.c
>> +++ b/arch/loongarch/kvm/vcpu.c
>> @@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
>> STATS_DESC_COUNTER(VCPU, idle_exits),
>> STATS_DESC_COUNTER(VCPU, cpucfg_exits),
>> STATS_DESC_COUNTER(VCPU, signal_exits),
>> + STATS_DESC_COUNTER(VCPU, hvcl_exits)
>> };
>>
>> const struct kvm_stats_header kvm_vcpu_stats_header = {
>> --
>> 2.39.3
>>
>>

2024-01-30 02:53:54

by maobibo

[permalink] [raw]

Subject: Re: [PATCH v3 5/6] LoongArch: KVM: Add physical cpuid map support

On 2024/1/29 下午9:11, Huacai Chen wrote:
> Hi, Bibo,
>
> Without this patch I can also create a SMP VM, so what problem does
> this patch want to solve?
With ipi irqchip, physical cpuid is used for dest cpu rather than
logical cpuid. And if ipi device is emulated in qemu side, there is
find_cpu_by_archid to get dest vcpu in file hw/intc/loongarch_ipi.c

Here with hypercall method, ipi is emulated in kvm kernel side, there
should be the same physical cpuid searching logic. And function
kvm_get_vcpu_by_cpuid is used with pv_ipi backend.

Regards
Bibo Mao

>
> Huacai
>
> On Mon, Jan 22, 2024 at 6:03 PM Bibo Mao <[email protected]> wrote:
>>
>> Physical cpuid is used to irq routing for irqchips such as ipi/msi/
>> extioi interrupt controller. And physical cpuid is stored at CSR
>> register LOONGARCH_CSR_CPUID, it can not be changed once vcpu is
>> created. Since different irqchips have different size definition
>> about physical cpuid, KVM uses the smallest cpuid from extioi, and
>> the max cpuid size is defines as 256.
>>
>> Signed-off-by: Bibo Mao <[email protected]>
>> ---
>> arch/loongarch/include/asm/kvm_host.h | 26 ++++++++
>> arch/loongarch/include/asm/kvm_vcpu.h | 1 +
>> arch/loongarch/kvm/vcpu.c | 93 ++++++++++++++++++++++++++-
>> arch/loongarch/kvm/vm.c | 11 ++++
>> 4 files changed, 130 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>> index 2d62f7b0d377..57399d7cf8b7 100644
>> --- a/arch/loongarch/include/asm/kvm_host.h
>> +++ b/arch/loongarch/include/asm/kvm_host.h
>> @@ -64,6 +64,30 @@ struct kvm_world_switch {
>>
>> #define MAX_PGTABLE_LEVELS 4
>>
>> +/*
>> + * Physical cpu id is used for interrupt routing, there are different
>> + * definitions about physical cpuid on different hardwares.
>> + * For LOONGARCH_CSR_CPUID register, max cpuid size if 512
>> + * For IPI HW, max dest CPUID size 1024
>> + * For extioi interrupt controller, max dest CPUID size is 256
>> + * For MSI interrupt controller, max supported CPUID size is 65536
>> + *
>> + * Currently max CPUID is defined as 256 for KVM hypervisor, in future
>> + * it will be expanded to 4096, including 16 packages at most. And every
>> + * package supports at most 256 vcpus
>> + */
>> +#define KVM_MAX_PHYID 256
>> +
>> +struct kvm_phyid_info {
>> + struct kvm_vcpu *vcpu;
>> + bool enabled;
>> +};
>> +
>> +struct kvm_phyid_map {
>> + int max_phyid;
>> + struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
>> +};
>> +
>> struct kvm_arch {
>> /* Guest physical mm */
>> kvm_pte_t *pgd;
>> @@ -71,6 +95,8 @@ struct kvm_arch {
>> unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
>> unsigned int pte_shifts[MAX_PGTABLE_LEVELS];
>> unsigned int root_level;
>> + struct mutex phyid_map_lock;
>> + struct kvm_phyid_map *phyid_map;
>>
>> s64 time_offset;
>> struct kvm_context __percpu *vmcs;
>> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
>> index e71ceb88f29e..2402129ee955 100644
>> --- a/arch/loongarch/include/asm/kvm_vcpu.h
>> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
>> @@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
>> void kvm_restore_timer(struct kvm_vcpu *vcpu);
>>
>> int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
>> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
>>
>> /*
>> * Loongarch KVM guest interrupt handling
>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>> index 27701991886d..97ca9c7160e6 100644
>> --- a/arch/loongarch/kvm/vcpu.c
>> +++ b/arch/loongarch/kvm/vcpu.c
>> @@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
>> return 0;
>> }
>>
>> +static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
>> +{
>> + int cpuid;
>> + struct loongarch_csrs *csr = vcpu->arch.csr;
>> + struct kvm_phyid_map *map;
>> +
>> + if (val >= KVM_MAX_PHYID)
>> + return -EINVAL;
>> +
>> + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
>> + map = vcpu->kvm->arch.phyid_map;
>> + mutex_lock(&vcpu->kvm->arch.phyid_map_lock);
>> + if (map->phys_map[cpuid].enabled) {
>> + /*
>> + * Cpuid is already set before
>> + * Forbid changing different cpuid at runtime
>> + */
>> + if (cpuid != val) {
>> + /*
>> + * Cpuid 0 is initial value for vcpu, maybe invalid
>> + * unset value for vcpu
>> + */
>> + if (cpuid) {
>> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> + return -EINVAL;
>> + }
>> + } else {
>> + /* Discard duplicated cpuid set */
>> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> + return 0;
>> + }
>> + }
>> +
>> + if (map->phys_map[val].enabled) {
>> + /*
>> + * New cpuid is already set with other vcpu
>> + * Forbid sharing the same cpuid between different vcpus
>> + */
>> + if (map->phys_map[val].vcpu != vcpu) {
>> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> + return -EINVAL;
>> + }
>> +
>> + /* Discard duplicated cpuid set operation*/
>> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> + return 0;
>> + }
>> +
>> + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
>> + map->phys_map[val].enabled = true;
>> + map->phys_map[val].vcpu = vcpu;
>> + if (map->max_phyid < val)
>> + map->max_phyid = val;
>> + mutex_unlock(&vcpu->kvm->arch.phyid_map_lock);
>> + return 0;
>> +}
>> +
>> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
>> +{
>> + struct kvm_phyid_map *map;
>> +
>> + if (cpuid >= KVM_MAX_PHYID)
>> + return NULL;
>> +
>> + map = kvm->arch.phyid_map;
>> + if (map->phys_map[cpuid].enabled)
>> + return map->phys_map[cpuid].vcpu;
>> +
>> + return NULL;
>> +}
>> +
>> +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
>> +{
>> + int cpuid;
>> + struct loongarch_csrs *csr = vcpu->arch.csr;
>> + struct kvm_phyid_map *map;
>> +
>> + map = vcpu->kvm->arch.phyid_map;
>> + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
>> + if (cpuid >= KVM_MAX_PHYID)
>> + return;
>> +
>> + if (map->phys_map[cpuid].enabled) {
>> + map->phys_map[cpuid].vcpu = NULL;
>> + map->phys_map[cpuid].enabled = false;
>> + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0);
>> + }
>> +}
>> +
>> static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
>> {
>> int ret = 0, gintc;
>> @@ -291,7 +380,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
>> kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc);
>>
>> return ret;
>> - }
>> + } else if (id == LOONGARCH_CSR_CPUID)
>> + return kvm_set_cpuid(vcpu, val);
>>
>> kvm_write_sw_gcsr(csr, id, val);
>>
>> @@ -925,6 +1015,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>> hrtimer_cancel(&vcpu->arch.swtimer);
>> kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
>> kfree(vcpu->arch.csr);
>> + kvm_drop_cpuid(vcpu);
>>
>> /*
>> * If the vCPU is freed and reused as another vCPU, we don't want the
>> diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
>> index 0a37f6fa8f2d..6fd5916ebef3 100644
>> --- a/arch/loongarch/kvm/vm.c
>> +++ b/arch/loongarch/kvm/vm.c
>> @@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>> if (!kvm->arch.pgd)
>> return -ENOMEM;
>>
>> + kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map),
>> + GFP_KERNEL_ACCOUNT);
>> + if (!kvm->arch.phyid_map) {
>> + free_page((unsigned long)kvm->arch.pgd);
>> + kvm->arch.pgd = NULL;
>> + return -ENOMEM;
>> + }
>> +
>> kvm_init_vmcs(kvm);
>> kvm->arch.gpa_size = BIT(cpu_vabits - 1);
>> kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
>> @@ -44,6 +52,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>> for (i = 0; i <= kvm->arch.root_level; i++)
>> kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3);
>>
>> + mutex_init(&kvm->arch.phyid_map_lock);
>> return 0;
>> }
>>
>> @@ -51,7 +60,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>> {
>> kvm_destroy_vcpus(kvm);
>> free_page((unsigned long)kvm->arch.pgd);
>> + kvfree(kvm->arch.phyid_map);
>> kvm->arch.pgd = NULL;
>> + kvm->arch.phyid_map = NULL;
>> }
>>
>> int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>> --
>> 2.39.3
>>