From: Lai Jiangshan <[email protected]>
Replace it with FNAME(sync_spte).
FNAME(sync_spte) combined with the shadow pagetable walk meets the
semantics of the instruction INVLPG.
Using FNAME(sync_spte) can share the code with flushing vTLB
(kvm_sync_page()) on invalidating each vTLB entry.
Signed-off-by: Lai Jiangshan <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 1 -
arch/x86/kvm/mmu/mmu.c | 48 +++++++++++++++++----------
arch/x86/kvm/mmu/paging_tmpl.h | 59 ---------------------------------
3 files changed, 31 insertions(+), 77 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 69b7967cd743..b80de8f53130 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -443,7 +443,6 @@ struct kvm_mmu {
struct x86_exception *exception);
int (*sync_spte)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, int i);
- void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
struct kvm_mmu_root_info root;
union kvm_cpu_role cpu_role;
union kvm_mmu_page_role root_role;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f39bee1542d8..1e5f2e79863f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1061,14 +1061,6 @@ static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
}
-static bool rmap_can_add(struct kvm_vcpu *vcpu)
-{
- struct kvm_mmu_memory_cache *mc;
-
- mc = &vcpu->arch.mmu_pte_list_desc_cache;
- return kvm_mmu_memory_cache_nr_free_objects(mc);
-}
-
static void rmap_remove(struct kvm *kvm, u64 *spte)
{
struct kvm_memslots *slots;
@@ -4505,7 +4497,6 @@ static void nonpaging_init_context(struct kvm_mmu *context)
context->page_fault = nonpaging_page_fault;
context->gva_to_gpa = nonpaging_gva_to_gpa;
context->sync_spte = NULL;
- context->invlpg = NULL;
}
static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd,
@@ -5094,7 +5085,6 @@ static void paging64_init_context(struct kvm_mmu *context)
context->page_fault = paging64_page_fault;
context->gva_to_gpa = paging64_gva_to_gpa;
context->sync_spte = paging64_sync_spte;
- context->invlpg = paging64_invlpg;
}
static void paging32_init_context(struct kvm_mmu *context)
@@ -5102,7 +5092,6 @@ static void paging32_init_context(struct kvm_mmu *context)
context->page_fault = paging32_page_fault;
context->gva_to_gpa = paging32_gva_to_gpa;
context->sync_spte = paging32_sync_spte;
- context->invlpg = paging32_invlpg;
}
static union kvm_cpu_role
@@ -5191,7 +5180,6 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu,
context->root_role.word = root_role.word;
context->page_fault = kvm_tdp_page_fault;
context->sync_spte = NULL;
- context->invlpg = NULL;
context->get_guest_pgd = get_cr3;
context->get_pdptr = kvm_pdptr_read;
context->inject_page_fault = kvm_inject_page_fault;
@@ -5323,7 +5311,6 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->page_fault = ept_page_fault;
context->gva_to_gpa = ept_gva_to_gpa;
context->sync_spte = ept_sync_spte;
- context->invlpg = ept_invlpg;
update_permission_bitmask(context, true);
context->pkru_mask = 0;
@@ -5364,7 +5351,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu,
* L2 page tables are never shadowed, so there is no need to sync
* SPTEs.
*/
- g_context->invlpg = NULL;
+ g_context->sync_spte = NULL;
/*
* Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
@@ -5739,6 +5726,33 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
}
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
+static void __kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gva_t gva, hpa_t root_hpa)
+{
+ struct kvm_shadow_walk_iterator iterator;
+
+ vcpu_clear_mmio_info(vcpu, gva);
+
+ write_lock(&vcpu->kvm->mmu_lock);
+ for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) {
+ struct kvm_mmu_page *sp = sptep_to_sp(iterator.sptep);
+
+ if (sp->unsync && *iterator.sptep) {
+ gfn_t gfn = kvm_mmu_page_get_gfn(sp, iterator.index);
+ int ret = mmu->sync_spte(vcpu, sp, iterator.index);
+
+ if (ret < 0)
+ mmu_page_zap_pte(vcpu->kvm, sp, iterator.sptep, NULL);
+ if (ret)
+ kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, 1);
+ }
+
+ if (!sp->unsync_children)
+ break;
+ }
+ write_unlock(&vcpu->kvm->mmu_lock);
+}
+
/* roots_to_invalidte must be some combination of the KVM_MMU_ROOT_* flags */
void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gva_t gva, ulong roots_to_invalidate)
@@ -5754,16 +5768,16 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
static_call(kvm_x86_flush_tlb_gva)(vcpu, gva);
}
- if (!mmu->invlpg)
+ if (!mmu->sync_spte)
return;
if ((roots_to_invalidate & KVM_MMU_ROOT_CURRENT) && VALID_PAGE(mmu->root.hpa))
- mmu->invlpg(vcpu, gva, mmu->root.hpa);
+ __kvm_mmu_invalidate_gva(vcpu, mmu, gva, mmu->root.hpa);
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
if ((roots_to_invalidate & KVM_MMU_ROOT_PREVIOUS(i)) &&
VALID_PAGE(mmu->prev_roots[i].hpa))
- mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+ __kvm_mmu_invalidate_gva(vcpu, mmu, gva, mmu->prev_roots[i].hpa);
}
EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_gva);
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 3bc13b9b61d1..62aac5d7d38c 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -851,65 +851,6 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
}
-static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
-{
- struct kvm_shadow_walk_iterator iterator;
- struct kvm_mmu_page *sp;
- u64 old_spte;
- int level;
- u64 *sptep;
-
- vcpu_clear_mmio_info(vcpu, gva);
-
- /*
- * No need to check return value here, rmap_can_add() can
- * help us to skip pte prefetch later.
- */
- mmu_topup_memory_caches(vcpu, true);
-
- if (!VALID_PAGE(root_hpa)) {
- WARN_ON(1);
- return;
- }
-
- write_lock(&vcpu->kvm->mmu_lock);
- for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) {
- level = iterator.level;
- sptep = iterator.sptep;
-
- sp = sptep_to_sp(sptep);
- old_spte = *sptep;
- if (is_last_spte(old_spte, level)) {
- pt_element_t gpte;
- gpa_t pte_gpa;
-
- if (!sp->unsync)
- break;
-
- pte_gpa = FNAME(get_level1_sp_gpa)(sp);
- pte_gpa += spte_index(sptep) * sizeof(pt_element_t);
-
- mmu_page_zap_pte(vcpu->kvm, sp, sptep, NULL);
- if (is_shadow_present_pte(old_spte))
- kvm_flush_remote_tlbs_with_address(vcpu->kvm,
- sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level));
-
- if (!rmap_can_add(vcpu))
- break;
-
- if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
- sizeof(pt_element_t)))
- break;
-
- FNAME(prefetch_gpte)(vcpu, sp, sptep, gpte, false);
- }
-
- if (!sp->unsync_children)
- break;
- }
- write_unlock(&vcpu->kvm->mmu_lock);
-}
-
/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gpa_t addr, u64 access,
--
2.19.1.6.gb485710b
On Thu, Jan 05, 2023, Lai Jiangshan wrote:
> From: Lai Jiangshan <[email protected]>
>
> Replace it with FNAME(sync_spte).
>
> FNAME(sync_spte) combined with the shadow pagetable walk meets the
> semantics of the instruction INVLPG.
Please call out the differences (I assume the two aren't perfectly identical),
and explain why those differences are benign.