Implement a platform hook to do the work of restoring the direct map
entries of gmem-managed pages and transitioning the corresponding RMP
table entries back to the default shared/hypervisor-owned state.
Signed-off-by: Michael Roth <[email protected]>
---
arch/x86/kvm/Kconfig | 1 +
arch/x86/kvm/svm/sev.c | 63 ++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/svm/svm.c | 1 +
arch/x86/kvm/svm/svm.h | 2 ++
4 files changed, 67 insertions(+)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 286b40d0b07c..32a5c37cbf88 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -125,6 +125,7 @@ config KVM_AMD_SEV
select ARCH_HAS_CC_PLATFORM
select KVM_GENERIC_PRIVATE_MEM
select HAVE_KVM_GMEM_PREPARE
+ select HAVE_KVM_GMEM_INVALIDATE
help
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
with Encrypted State (SEV-ES) on AMD processors.
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index e1f8be1df219..87d621d013a4 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -4380,3 +4380,66 @@ int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
return 0;
}
+
+void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
+{
+ kvm_pfn_t pfn;
+
+ pr_debug("%s: PFN start 0x%llx PFN end 0x%llx\n", __func__, start, end);
+
+ for (pfn = start; pfn < end;) {
+ bool use_2m_update = false;
+ int rc, rmp_level;
+ bool assigned;
+
+ rc = snp_lookup_rmpentry(pfn, &assigned, &rmp_level);
+ if (rc) {
+ pr_debug_ratelimited("SEV: Failed to retrieve RMP entry for PFN 0x%llx error %d\n",
+ pfn, rc);
+ goto next_pfn;
+ }
+
+ if (!assigned)
+ goto next_pfn;
+
+ use_2m_update = IS_ALIGNED(pfn, PTRS_PER_PMD) &&
+ end >= (pfn + PTRS_PER_PMD) &&
+ rmp_level > PG_LEVEL_4K;
+
+ /*
+ * If an unaligned PFN corresponds to a 2M region assigned as a
+ * large page in he RMP table, PSMASH the region into individual
+ * 4K RMP entries before attempting to convert a 4K sub-page.
+ */
+ if (!use_2m_update && rmp_level > PG_LEVEL_4K) {
+ rc = snp_rmptable_psmash(pfn);
+ if (rc)
+ pr_err_ratelimited("SEV: Failed to PSMASH RMP entry for PFN 0x%llx error %d\n",
+ pfn, rc);
+ }
+
+ rc = rmp_make_shared(pfn, use_2m_update ? PG_LEVEL_2M : PG_LEVEL_4K);
+ if (WARN_ON_ONCE(rc)) {
+ pr_err_ratelimited("SEV: Failed to update RMP entry for PFN 0x%llx error %d\n",
+ pfn, rc);
+ goto next_pfn;
+ }
+
+ /*
+ * SEV-ES avoids host/guest cache coherency issues through
+ * WBINVD hooks issued via MMU notifiers during run-time, and
+ * KVM's VM destroy path at shutdown. Those MMU notifier events
+ * don't cover gmem since there is no requirement to map pages
+ * to a HVA in order to use them for a running guest. While the
+ * shutdown path would still likely cover things for SNP guests,
+ * userspace may also free gmem pages during run-time via
+ * hole-punching operations on the guest_memfd, so flush the
+ * cache entries for these pages before free'ing them back to
+ * the host.
+ */
+ clflush_cache_range(__va(pfn_to_hpa(pfn)),
+ use_2m_update ? PMD_SIZE : PAGE_SIZE);
+next_pfn:
+ pfn += use_2m_update ? PTRS_PER_PMD : 1;
+ }
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c099154e326a..b456906f2670 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5080,6 +5080,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.alloc_apic_backing_page = svm_alloc_apic_backing_page,
.gmem_prepare = sev_gmem_prepare,
+ .gmem_invalidate = sev_gmem_invalidate,
};
/*
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 53618cfc2b89..3f1f6d3d3ade 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -731,6 +731,7 @@ void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code);
void sev_vcpu_unblocking(struct kvm_vcpu *vcpu);
void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu);
int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
+void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
#else
static inline struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) {
return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
@@ -751,6 +752,7 @@ static inline int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, in
{
return 0;
}
+static inline void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) {}
#endif
--
2.25.1
On 3/29/24 23:58, Michael Roth wrote:
> + /*
> + * If an unaligned PFN corresponds to a 2M region assigned as a
> + * large page in he RMP table, PSMASH the region into individual
> + * 4K RMP entries before attempting to convert a 4K sub-page.
> + */
> + if (!use_2m_update && rmp_level > PG_LEVEL_4K) {
> + rc = snp_rmptable_psmash(pfn);
> + if (rc)
> + pr_err_ratelimited("SEV: Failed to PSMASH RMP entry for PFN 0x%llx error %d\n",
> + pfn, rc);
> + }
Ignoring the PSMASH failure is pretty scary... At this point
.free_folio cannot fail, should the psmash part of this patch be done in
kvm_gmem_invalidate_begin() before kvm_mmu_unmap_gfn_range()?
Also, can you get PSMASH_FAIL_INUSE and if so what's the best way to
address it? Should fallocate() return -EBUSY?
Thanks,
Paolo