2020-07-02 13:56:47

by Keqian Zhu

[permalink] [raw]
Subject: [PATCH v2 2/8] KVM: arm64: Scan PTEs to sync dirty log

For hardware management of dirty state, dirty state is stored in
PTEs. We have to scan all PTEs to sync dirty log to memslot dirty
bitmap.

Signed-off-by: Keqian Zhu <[email protected]>
Signed-off-by: Peng Liang <[email protected]>
---
arch/arm64/include/asm/kvm_host.h | 1 +
arch/arm64/include/asm/kvm_mmu.h | 13 +++
arch/arm64/kvm/arm.c | 3 +-
arch/arm64/kvm/mmu.c | 142 ++++++++++++++++++++++++++++++
4 files changed, 158 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index c3e6fcc664b1..86b9c210ba43 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -480,6 +480,7 @@ u64 __kvm_call_hyp(void *hypfn, ...);

void force_vm_exit(const cpumask_t *mask);
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
+void kvm_mmu_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);

int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 2700442b0f75..4c12b7ad8ae8 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -269,6 +269,19 @@ static inline bool kvm_s2pud_young(pud_t pud)
return pud_young(pud);
}

+static inline bool arm_mmu_hw_dbm_supported(void)
+{
+ u8 hadbs = (read_sysreg(id_aa64mmfr1_el1) >>
+ ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
+
+ return hadbs == 0x2;
+}
+
+static inline bool kvm_mmu_hw_dbm_enabled(struct kvm *kvm)
+{
+ return arm_mmu_hw_dbm_supported() && !!(kvm->arch.vtcr & VTCR_EL2_HD);
+}
+
#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)

#ifdef __PAGETABLE_PMD_FOLDED
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 90cb90561446..fefa5406e037 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1199,7 +1199,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,

void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
-
+ if (IS_ENABLED(CONFIG_ARM66_HW_AFDBM) && kvm_mmu_hw_dbm_enabled(kvm))
+ kvm_mmu_sync_dirty_log(kvm, memslot);
}

void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 8c0035cab6b6..b3cb8b6da4c2 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -2411,6 +2411,148 @@ int kvm_mmu_init(void)
return err;
}

+#ifdef CONFIG_ARM64_HW_AFDBM
+/**
+ * stage2_sync_dirty_log_ptes() - synchronize dirty log from PMD range
+ * @kvm: The KVM pointer
+ * @pmd: pointer to pmd entry
+ * @addr: range start address
+ * @end: range end address
+ */
+static void stage2_sync_dirty_log_ptes(struct kvm *kvm, pmd_t *pmd,
+ phys_addr_t addr, phys_addr_t end)
+{
+ pte_t *pte;
+
+ pte = pte_offset_kernel(pmd, addr);
+ do {
+ if (!pte_none(*pte) && !kvm_s2pte_readonly(pte))
+ mark_page_dirty(kvm, addr >> PAGE_SHIFT);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+/**
+ * stage2_sync_dirty_log_pmds() - synchronize dirty log from PUD range
+ * @kvm: The KVM pointer
+ * @pud: pointer to pud entry
+ * @addr: range start address
+ * @end: range end address
+ */
+static void stage2_sync_dirty_log_pmds(struct kvm *kvm, pud_t *pud,
+ phys_addr_t addr, phys_addr_t end)
+{
+ pmd_t *pmd;
+ phys_addr_t next;
+
+ pmd = stage2_pmd_offset(kvm, pud, addr);
+ do {
+ next = stage2_pmd_addr_end(kvm, addr, end);
+ if (!pmd_none(*pmd) && !pmd_thp_or_huge(*pmd))
+ stage2_sync_dirty_log_ptes(kvm, pmd, addr, next);
+ } while (pmd++, addr = next, addr != end);
+}
+
+/**
+ * stage2_sync_dirty_log_puds() - synchronize dirty log from P4D range
+ * @kvm: The KVM pointer
+ * @pgd: pointer to pgd entry
+ * @addr: range start address
+ * @end: range end address
+ */
+static void stage2_sync_dirty_log_puds(struct kvm *kvm, p4d_t *p4d,
+ phys_addr_t addr, phys_addr_t end)
+{
+ pud_t *pud;
+ phys_addr_t next;
+
+ pud = stage2_pud_offset(kvm, p4d, addr);
+ do {
+ next = stage2_pud_addr_end(kvm, addr, end);
+ if (!stage2_pud_none(kvm, *pud) && !stage2_pud_huge(kvm, *pud))
+ stage2_sync_dirty_log_pmds(kvm, pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+}
+
+/**
+ * stage2_sync_dirty_log_p4ds() - synchronize dirty log from PGD range
+ * @kvm: The KVM pointer
+ * @pgd: pointer to pgd entry
+ * @addr: range start address
+ * @end: range end address
+ */
+static void stage2_sync_dirty_log_p4ds(struct kvm *kvm, pgd_t *pgd,
+ phys_addr_t addr, phys_addr_t end)
+{
+ p4d_t *p4d;
+ phys_addr_t next;
+
+ p4d = stage2_p4d_offset(kvm, pgd, addr);
+ do {
+ next = stage2_p4d_addr_end(kvm, addr, end);
+ if (!stage2_p4d_none(kvm, *p4d))
+ stage2_sync_dirty_log_puds(kvm, p4d, addr, next);
+ } while (p4d++, addr = next, addr != end);
+}
+
+/**
+ * stage2_sync_dirty_log_range() - synchronize dirty log from stage2 memory
+ * region range
+ * @kvm: The KVM pointer
+ * @addr: Start address of range
+ * @end: End address of range
+ */
+static void stage2_sync_dirty_log_range(struct kvm *kvm, phys_addr_t addr,
+ phys_addr_t end)
+{
+ pgd_t *pgd;
+ phys_addr_t next;
+
+ pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ do {
+ cond_resched_lock(&kvm->mmu_lock);
+ if (!READ_ONCE(kvm->arch.pgd))
+ break;
+ next = stage2_pgd_addr_end(kvm, addr, end);
+ if (stage2_pgd_present(kvm, *pgd))
+ stage2_sync_dirty_log_p4ds(kvm, pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
+
+/**
+ * kvm_mmu_sync_dirty_log() - synchronize dirty log from stage2 PTEs for
+ * memory slot
+ * @kvm: The KVM pointer
+ * @slot: The memory slot to synchronize dirty log
+ *
+ * Called to synchronize dirty log (marked by hw) after memory region
+ * KVM_GET_DIRTY_LOG operation is called. After this function returns
+ * all dirty log information (for that hw will modify page tables during
+ * this routine, it is true only when guest is stopped, but it is OK
+ * because we won't miss dirty log finally.) are collected into memslot
+ * dirty_bitmap. Afterwards dirty_bitmap can be copied to userspace.
+ *
+ * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
+ * serializing operations for VM memory regions.
+ */
+void kvm_mmu_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+ phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
+ phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
+ int srcu_idx;
+
+ if (WARN_ON_ONCE(!memslot->dirty_bitmap))
+ return;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
+
+ stage2_sync_dirty_log_range(kvm, start, end);
+
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+#endif /* CONFIG_ARM64_HW_AFDBM */
+
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
--
2.19.1