Implement loongarch kvm module init, module exit interface,
using kvm context to save the vpid info and vcpu world switch
interface pointer.
Signed-off-by: Tianrui Zhao <[email protected]>
---
arch/loongarch/kvm/main.c | 81 +++++++++++++++++++++++++++++++++++++++
1 file changed, 81 insertions(+)
create mode 100644 arch/loongarch/kvm/main.c
diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c
new file mode 100644
index 000000000..d7969d02a
--- /dev/null
+++ b/arch/loongarch/kvm/main.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_host.h>
+#include <asm/cacheflush.h>
+
+static struct kvm_context __percpu *vmcs;
+
+int kvm_arch_init(void *opaque)
+{
+ struct kvm_context *context;
+ unsigned long vpid_mask;
+ int cpu, order;
+ void *addr;
+
+ vmcs = alloc_percpu(struct kvm_context);
+ if (!vmcs) {
+ pr_err("kvm: failed to allocate percpu kvm_context\n");
+ return -ENOMEM;
+ }
+
+ order = get_order(kvm_vector_size + kvm_enter_guest_size);
+ addr = (void *)__get_free_pages(GFP_KERNEL, order);
+ if (!addr) {
+ free_percpu(vmcs);
+ return -ENOMEM;
+ }
+
+ memcpy(addr, kvm_vector_entry, kvm_vector_size);
+ memcpy(addr + kvm_vector_size, kvm_enter_guest, kvm_enter_guest_size);
+ flush_icache_range((unsigned long)addr, (unsigned long)addr +
+ kvm_vector_size + kvm_enter_guest_size);
+
+ vpid_mask = read_csr_gstat();
+ vpid_mask = (vpid_mask & CSR_GSTAT_GIDBIT) >> CSR_GSTAT_GIDBIT_SHIFT;
+ if (vpid_mask)
+ vpid_mask = GENMASK(vpid_mask - 1, 0);
+
+ for_each_possible_cpu(cpu) {
+ context = per_cpu_ptr(vmcs, cpu);
+ context->vpid_mask = vpid_mask;
+ context->vpid_cache = context->vpid_mask + 1;
+ context->last_vcpu = NULL;
+ context->kvm_eentry = addr;
+ context->kvm_enter_guest = addr + kvm_vector_size;
+ context->page_order = order;
+ }
+
+ _kvm_init_fault();
+
+ return 0;
+}
+
+void kvm_arch_exit(void)
+{
+ struct kvm_context *context = per_cpu_ptr(vmcs, 0);
+
+ free_pages((unsigned long)context->kvm_eentry, context->page_order);
+ free_percpu(vmcs);
+}
+
+static int kvm_loongarch_init(void)
+{
+ if (!cpu_has_lvz)
+ return 0;
+
+ return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+}
+
+static void kvm_loongarch_exit(void)
+{
+ kvm_exit();
+}
+
+module_init(kvm_loongarch_init);
+module_exit(kvm_loongarch_exit);
--
2.31.1
On 2/20/23 07:57, Tianrui Zhao wrote:
> + order = get_order(kvm_vector_size + kvm_enter_guest_size);
> + addr = (void *)__get_free_pages(GFP_KERNEL, order);
> + if (!addr) {
> + free_percpu(vmcs);
> + return -ENOMEM;
> + }
> +
> + memcpy(addr, kvm_vector_entry, kvm_vector_size);
> + memcpy(addr + kvm_vector_size, kvm_enter_guest, kvm_enter_guest_size);
> + flush_icache_range((unsigned long)addr, (unsigned long)addr +
> + kvm_vector_size + kvm_enter_guest_size);
> +
> + vpid_mask = read_csr_gstat();
> + vpid_mask = (vpid_mask & CSR_GSTAT_GIDBIT) >> CSR_GSTAT_GIDBIT_SHIFT;
> + if (vpid_mask)
> + vpid_mask = GENMASK(vpid_mask - 1, 0);
> +
> + for_each_possible_cpu(cpu) {
> + context = per_cpu_ptr(vmcs, cpu);
> + context->vpid_mask = vpid_mask;
> + context->vpid_cache = context->vpid_mask + 1;
> + context->last_vcpu = NULL;
> + context->kvm_eentry = addr;
> + context->kvm_enter_guest = addr + kvm_vector_size;
> + context->page_order = order;
> + }
A lot of these variables are constant across all pCPUs, any reason to
have them in a per-CPU variable? Likewise, since they are all the same
as the constant global vmcs variable, why make them part of struct
kvm_context instead of just making them globals?
Also, why does the world switch code need a copy?
Paolo
在 2023年02月21日 01:46, Paolo Bonzini 写道:
> On 2/20/23 07:57, Tianrui Zhao wrote:
>> + order = get_order(kvm_vector_size + kvm_enter_guest_size);
>> + addr = (void *)__get_free_pages(GFP_KERNEL, order);
>> + if (!addr) {
>> + free_percpu(vmcs);
>> + return -ENOMEM;
>> + }
>> +
>> + memcpy(addr, kvm_vector_entry, kvm_vector_size);
>> + memcpy(addr + kvm_vector_size, kvm_enter_guest,
>> kvm_enter_guest_size);
>> + flush_icache_range((unsigned long)addr, (unsigned long)addr +
>> + kvm_vector_size + kvm_enter_guest_size);
>> +
>> + vpid_mask = read_csr_gstat();
>> + vpid_mask = (vpid_mask & CSR_GSTAT_GIDBIT) >>
>> CSR_GSTAT_GIDBIT_SHIFT;
>> + if (vpid_mask)
>> + vpid_mask = GENMASK(vpid_mask - 1, 0);
>> +
>> + for_each_possible_cpu(cpu) {
>> + context = per_cpu_ptr(vmcs, cpu);
>> + context->vpid_mask = vpid_mask;
>> + context->vpid_cache = context->vpid_mask + 1;
>> + context->last_vcpu = NULL;
>> + context->kvm_eentry = addr;
>> + context->kvm_enter_guest = addr + kvm_vector_size;
>> + context->page_order = order;
>> + }
>
> A lot of these variables are constant across all pCPUs, any reason to
> have them in a per-CPU variable? Likewise, since they are all the
> same as the constant global vmcs variable, why make them part of
> struct kvm_context instead of just making them globals?
Ok thanks, it is more appropriate to use global variables to save those
information.
Thanks
Tianrui Zhao
>
> Also, why does the world switch code need a copy?
>
> Paolo
在 2023/2/21 01:46, Paolo Bonzini 写道:
> On 2/20/23 07:57, Tianrui Zhao wrote:
>> + order = get_order(kvm_vector_size + kvm_enter_guest_size);
>> + addr = (void *)__get_free_pages(GFP_KERNEL, order);
>> + if (!addr) {
>> + free_percpu(vmcs);
>> + return -ENOMEM;
>> + }
>> +
>> + memcpy(addr, kvm_vector_entry, kvm_vector_size);
>> + memcpy(addr + kvm_vector_size, kvm_enter_guest, kvm_enter_guest_size);
>> + flush_icache_range((unsigned long)addr, (unsigned long)addr +
>> + kvm_vector_size + kvm_enter_guest_size);
>> +
>> + vpid_mask = read_csr_gstat();
>> + vpid_mask = (vpid_mask & CSR_GSTAT_GIDBIT) >> CSR_GSTAT_GIDBIT_SHIFT;
>> + if (vpid_mask)
>> + vpid_mask = GENMASK(vpid_mask - 1, 0);
>> +
>> + for_each_possible_cpu(cpu) {
>> + context = per_cpu_ptr(vmcs, cpu);
>> + context->vpid_mask = vpid_mask;
>> + context->vpid_cache = context->vpid_mask + 1;
>> + context->last_vcpu = NULL;
>> + context->kvm_eentry = addr;
>> + context->kvm_enter_guest = addr + kvm_vector_size;
>> + context->page_order = order;
>> + }
>
> A lot of these variables are constant across all pCPUs, any reason to have them in a per-CPU variable? Likewise, since they are all the same as the constant global vmcs variable, why make them part of struct kvm_context instead of just making them globals?
>
Paolo,
Thanks for reviewing these patches.
Originally we think that global variables make c files depending with
each other, and global variables is not faster than percpu, so that
we removes global variables. we are ok to make them globals.
> Also, why does the world switch code need a copy?
There will be problem in world switch code if there is page fault reenter,
since pgd register is shared between root kernel and kvm hypervisor.
World switch entry need be unmapped area, cannot be tlb mapped area.
In future if hw pagetable walking is supported, or there is separate pgd
registers between root kernel and kvm hypervisor, copying about world switch
code will not be used.
Regards
Bibo, Mao
>
> Paolo
On 2/21/23 07:59, maobibo wrote:
>> Also, why does the world switch code need a copy?
> There will be problem in world switch code if there is page fault reenter,
> since pgd register is shared between root kernel and kvm hypervisor.
> World switch entry need be unmapped area, cannot be tlb mapped area.
So if I understand correctly the processor is in direct address
translation mode until the "csrwr t0, LOONGARCH_CSR_CRMD" instruction.
Where does it leave paged mode?
Can you please also add comments to kvm_vector_entry explaining the
processor state after a VZ exception entry (interrupts, paging, ...)?
Paolo
在 2023/2/21 16:14, Paolo Bonzini 写道:
> On 2/21/23 07:59, maobibo wrote:
>>> Also, why does the world switch code need a copy?
>> There will be problem in world switch code if there is page fault reenter,
>> since pgd register is shared between root kernel and kvm hypervisor.
>> World switch entry need be unmapped area, cannot be tlb mapped area.
>
> So if I understand correctly the processor is in direct address translation mode until the "csrwr t0, LOONGARCH_CSR_CRMD" instruction. Where does it leave paged mode?
The processor still in paged mode during world switch context. For example
when vm exits from guest mode to root mode, it executes world switch code
from kvm_vector_entry, PC register points to HVA address, however vmid from
LOONGARCH_CSR_GTLBC is not clear to root mode. If there is page fault
exception, hardware treats it exception from GPA-->HPA rather than that
from HVA --> HPA, since vmid info in CSR_GTLBC is not zero.
In page mode, there are two kinds of address: unmapped address and
tlb mapped address. For unmapped address there is only cachable/uncachable
attribution, but not RWX attr; and there is no tlb handling for it.
For simplicity, unmapped address can be treated as window filtered address.
It will be fully root mode only after this piece of code is executed
during world switch context; vmid is zero and PC points to HVA.
ori t0, zero, CSR_GSTAT_PVM
csrxchg zero, t0, LOONGARCH_CSR_GSTAT
/* Clear GTLBC.TGID field */
csrrd t0, LOONGARCH_CSR_GTLBC
bstrins.w t0, zero, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
csrwr t0, LOONGARCH_CSR_GTLBC
>
> Can you please also add comments to kvm_vector_entry explaining the processor state after a VZ exception entry (interrupts, paging, ...)?
Yeap, we will add more comments about these critical exception entry.
Regards
Bibo, Mao
>
> Paolo
On 2023/2/21 18:18, maobibo wrote:
>
>
> 在 2023/2/21 16:14, Paolo Bonzini 写道:
>> On 2/21/23 07:59, maobibo wrote:
>>>> Also, why does the world switch code need a copy?
>>> There will be problem in world switch code if there is page fault reenter,
>>> since pgd register is shared between root kernel and kvm hypervisor.
>>> World switch entry need be unmapped area, cannot be tlb mapped area.
>>
>> So if I understand correctly the processor is in direct address translation mode until the "csrwr t0, LOONGARCH_CSR_CRMD" instruction. Where does it leave paged mode?
> The processor still in paged mode during world switch context. For example
> when vm exits from guest mode to root mode, it executes world switch code
> from kvm_vector_entry, PC register points to HVA address, however vmid from
> LOONGARCH_CSR_GTLBC is not clear to root mode. If there is page fault
> exception, hardware treats it exception from GPA-->HPA rather than that
> from HVA --> HPA, since vmid info in CSR_GTLBC is not zero.
>
> In page mode, there are two kinds of address: unmapped address and
> tlb mapped address. For unmapped address there is only cachable/uncachable
> attribution, but not RWX attr; and there is no tlb handling for it.
> For simplicity, unmapped address can be treated as window filtered address.
>
> It will be fully root mode only after this piece of code is executed
> during world switch context; vmid is zero and PC points to HVA.
> ori t0, zero, CSR_GSTAT_PVM
> csrxchg zero, t0, LOONGARCH_CSR_GSTAT
> /* Clear GTLBC.TGID field */
> csrrd t0, LOONGARCH_CSR_GTLBC
> bstrins.w t0, zero, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
> csrwr t0, LOONGARCH_CSR_GTLBC
AFAIK all of these is probably coming from Volume 3 of LoongArch ISA
Manual, which is unfortunately not publicly available at the moment. For
sake of meaningful reviews, when can we expect to get our hands on the
manuals?
--
WANG "xen0n" Xuerui
Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/
在 2023/2/21 18:37, WANG Xuerui 写道:
> On 2023/2/21 18:18, maobibo wrote:
>>
>>
>> 在 2023/2/21 16:14, Paolo Bonzini 写道:
>>> On 2/21/23 07:59, maobibo wrote:
>>>>> Also, why does the world switch code need a copy?
>>>> There will be problem in world switch code if there is page fault reenter,
>>>> since pgd register is shared between root kernel and kvm hypervisor.
>>>> World switch entry need be unmapped area, cannot be tlb mapped area.
>>>
>>> So if I understand correctly the processor is in direct address translation mode until the "csrwr t0, LOONGARCH_CSR_CRMD" instruction. Where does it leave paged mode?
>> The processor still in paged mode during world switch context. For example
>> when vm exits from guest mode to root mode, it executes world switch code
>> from kvm_vector_entry, PC register points to HVA address, however vmid from
>> LOONGARCH_CSR_GTLBC is not clear to root mode. If there is page fault
>> exception, hardware treats it exception from GPA-->HPA rather than that
>> from HVA --> HPA, since vmid info in CSR_GTLBC is not zero.
>>
>> In page mode, there are two kinds of address: unmapped address and
>> tlb mapped address. For unmapped address there is only cachable/uncachable
>> attribution, but not RWX attr; and there is no tlb handling for it.
>> For simplicity, unmapped address can be treated as window filtered address.
>>
>> It will be fully root mode only after this piece of code is executed
>> during world switch context; vmid is zero and PC points to HVA.
>> ori t0, zero, CSR_GSTAT_PVM
>> csrxchg zero, t0, LOONGARCH_CSR_GSTAT
>> /* Clear GTLBC.TGID field */
>> csrrd t0, LOONGARCH_CSR_GTLBC
>> bstrins.w t0, zero, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
>> csrwr t0, LOONGARCH_CSR_GTLBC
>
> AFAIK all of these is probably coming from Volume 3 of LoongArch ISA Manual, which is unfortunately not publicly available at the moment. For sake of meaningful reviews, when can we expect to get our hands on the manuals?
We are pushing to public the virtualization manual inside, it is convenient
to sw developer to review the code. However I am not sure about the date :(
Regards
Bibo, Mao
>
On 2023/2/21 19:39, maobibo wrote:
>
>
> 在 2023/2/21 18:37, WANG Xuerui 写道:
>> On 2023/2/21 18:18, maobibo wrote:
>>>
>>>
>>> 在 2023/2/21 16:14, Paolo Bonzini 写道:
>>>> On 2/21/23 07:59, maobibo wrote:
>>>>>> Also, why does the world switch code need a copy?
>>>>> There will be problem in world switch code if there is page fault reenter,
>>>>> since pgd register is shared between root kernel and kvm hypervisor.
>>>>> World switch entry need be unmapped area, cannot be tlb mapped area.
>>>>
>>>> So if I understand correctly the processor is in direct address translation mode until the "csrwr t0, LOONGARCH_CSR_CRMD" instruction. Where does it leave paged mode?
>>> The processor still in paged mode during world switch context. For example
>>> when vm exits from guest mode to root mode, it executes world switch code
>>> from kvm_vector_entry, PC register points to HVA address, however vmid from
>>> LOONGARCH_CSR_GTLBC is not clear to root mode. If there is page fault
>>> exception, hardware treats it exception from GPA-->HPA rather than that
>>> from HVA --> HPA, since vmid info in CSR_GTLBC is not zero.
>>>
>>> In page mode, there are two kinds of address: unmapped address and
>>> tlb mapped address. For unmapped address there is only cachable/uncachable
>>> attribution, but not RWX attr; and there is no tlb handling for it.
>>> For simplicity, unmapped address can be treated as window filtered address.
>>>
>>> It will be fully root mode only after this piece of code is executed
>>> during world switch context; vmid is zero and PC points to HVA.
>>> ori t0, zero, CSR_GSTAT_PVM
>>> csrxchg zero, t0, LOONGARCH_CSR_GSTAT
>>> /* Clear GTLBC.TGID field */
>>> csrrd t0, LOONGARCH_CSR_GTLBC
>>> bstrins.w t0, zero, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
>>> csrwr t0, LOONGARCH_CSR_GTLBC
>>
>> AFAIK all of these is probably coming from Volume 3 of LoongArch ISA Manual, which is unfortunately not publicly available at the moment. For sake of meaningful reviews, when can we expect to get our hands on the manuals?
> We are pushing to public the virtualization manual inside, it is convenient
> to sw developer to review the code. However I am not sure about the date :(
Well, that's kinda expected, but it's nice to see some progress and
certainly your open attitude to this matter is constructive. Thanks for
sharing this and looking forward to the eventual docs release then!
--
WANG "xen0n" Xuerui
Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/