2022-12-06 17:59:47

by Ben Gardon

[permalink] [raw]
Subject: [PATCH 0/7] KVM: x86/MMU: Factor rmap operations out of mmu.c

Move the basic operations for manipulating an rmap out of mmu.c, into
their own files.

This is the first step in a series of series to split the Shadow MMU out of
mmu.c. Splitting the shadow MMU out results in about a 50% reduction in line
count of mmu.c, from ~7K to ~3.5K. The rmap operations only represent about
10% of the Shadow MMU but are split out first becase the are relatively
self-contained.

This split may also help facilitate the addition of kUnit tests for rmap
manipulation, especially the fiddly bit flag which diferentiates a direct
pointer from a pte_list_desc.

This effort is complimentary to David Matlack's proposal to refactor
the TDP MMU into an arch-neutral implementation because it further
clarifies the distinction between the Shadow MMU and TDP MMU within x86.

This series contains no functional changes. It's just a direct movement
of code from one file to another.

Whether this rmap code should stay in its own file or get merged with
the rest of the shadow MMU code as it is factored out is open for
debate.

Ben Gardon (7):
KVM: x86/MMU: Move pte_list operations to rmap.c
KVM: x86/MMU: Move rmap_iterator to rmap.h
KVM: x86/MMU: Move gfn_to_rmap() to rmap.c
KVM: x86/MMU: Move rmap_can_add() and rmap_remove() to rmap.c
KVM: x86/MMU: Move the rmap walk iterator out of mmu.c
KVM: x86/MMU: Move rmap zap operations to rmap.c
KVM: x86/MMU: Move rmap_add() to rmap.c

arch/x86/kvm/Makefile | 2 +-
arch/x86/kvm/debugfs.c | 1 +
arch/x86/kvm/mmu/mmu.c | 438 +-------------------------------
arch/x86/kvm/mmu/mmu_internal.h | 9 +-
arch/x86/kvm/mmu/rmap.c | 368 +++++++++++++++++++++++++++
arch/x86/kvm/mmu/rmap.h | 106 ++++++++
6 files changed, 492 insertions(+), 432 deletions(-)
create mode 100644 arch/x86/kvm/mmu/rmap.c
create mode 100644 arch/x86/kvm/mmu/rmap.h

--
2.39.0.rc0.267.gcb52ba06e7-goog


2022-12-06 18:01:04

by Ben Gardon

[permalink] [raw]
Subject: [PATCH 1/7] KVM: x86/MMU: Move pte_list operations to rmap.c

In the interest of eventually splitting the Shadow MMU out of mmu.c,
start by moving some of the operations for manipulating pte_lists out of
mmu.c and into a new pair of files: rmap.c and rmap.h.

No functional change intended.

Signed-off-by: Ben Gardon <[email protected]>
---
arch/x86/kvm/Makefile | 2 +-
arch/x86/kvm/debugfs.c | 1 +
arch/x86/kvm/mmu/mmu.c | 152 +-------------------------------
arch/x86/kvm/mmu/mmu_internal.h | 1 -
arch/x86/kvm/mmu/rmap.c | 141 +++++++++++++++++++++++++++++
arch/x86/kvm/mmu/rmap.h | 34 +++++++
6 files changed, 179 insertions(+), 152 deletions(-)
create mode 100644 arch/x86/kvm/mmu/rmap.c
create mode 100644 arch/x86/kvm/mmu/rmap.h

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 80e3fe184d17..9f766eebeddf 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
- mmu/spte.o
+ mmu/spte.o mmu/rmap.o

ifdef CONFIG_HYPERV
kvm-y += kvm_onhyperv.o
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index c1390357126a..29f692ecd6f3 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -9,6 +9,7 @@
#include "lapic.h"
#include "mmu.h"
#include "mmu/mmu_internal.h"
+#include "mmu/rmap.h"

static int vcpu_get_timer_advance_ns(void *data, u64 *val)
{
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4736d7849c60..90b3735d6064 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -26,6 +26,7 @@
#include "kvm_emulate.h"
#include "cpuid.h"
#include "spte.h"
+#include "rmap.h"

#include <linux/kvm_host.h>
#include <linux/types.h>
@@ -112,24 +113,6 @@ module_param(dbg, bool, 0644);

#include <trace/events/kvm.h>

-/* make pte_list_desc fit well in cache lines */
-#define PTE_LIST_EXT 14
-
-/*
- * Slight optimization of cacheline layout, by putting `more' and `spte_count'
- * at the start; then accessing it will only use one single cacheline for
- * either full (entries==PTE_LIST_EXT) case or entries<=6.
- */
-struct pte_list_desc {
- struct pte_list_desc *more;
- /*
- * Stores number of entries stored in the pte_list_desc. No need to be
- * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
- */
- u64 spte_count;
- u64 *sptes[PTE_LIST_EXT];
-};
-
struct kvm_shadow_walk_iterator {
u64 addr;
hpa_t shadow_addr;
@@ -155,7 +138,6 @@ struct kvm_shadow_walk_iterator {
({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \
__shadow_walk_next(&(_walker), spte))

-static struct kmem_cache *pte_list_desc_cache;
struct kmem_cache *mmu_page_header_cache;
static struct percpu_counter kvm_total_used_mmu_pages;

@@ -674,11 +656,6 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
}

-static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
-{
- kmem_cache_free(pte_list_desc_cache, pte_list_desc);
-}
-
static bool sp_has_gptes(struct kvm_mmu_page *sp);

static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
@@ -878,111 +855,6 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
return slot;
}

-/*
- * About rmap_head encoding:
- *
- * If the bit zero of rmap_head->val is clear, then it points to the only spte
- * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
- * pte_list_desc containing more mappings.
- */
-
-/*
- * Returns the number of pointers in the rmap chain, not counting the new one.
- */
-static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
- struct kvm_rmap_head *rmap_head)
-{
- struct pte_list_desc *desc;
- int count = 0;
-
- if (!rmap_head->val) {
- rmap_printk("%p %llx 0->1\n", spte, *spte);
- rmap_head->val = (unsigned long)spte;
- } else if (!(rmap_head->val & 1)) {
- rmap_printk("%p %llx 1->many\n", spte, *spte);
- desc = kvm_mmu_memory_cache_alloc(cache);
- desc->sptes[0] = (u64 *)rmap_head->val;
- desc->sptes[1] = spte;
- desc->spte_count = 2;
- rmap_head->val = (unsigned long)desc | 1;
- ++count;
- } else {
- rmap_printk("%p %llx many->many\n", spte, *spte);
- desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
- while (desc->spte_count == PTE_LIST_EXT) {
- count += PTE_LIST_EXT;
- if (!desc->more) {
- desc->more = kvm_mmu_memory_cache_alloc(cache);
- desc = desc->more;
- desc->spte_count = 0;
- break;
- }
- desc = desc->more;
- }
- count += desc->spte_count;
- desc->sptes[desc->spte_count++] = spte;
- }
- return count;
-}
-
-static void
-pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
- struct pte_list_desc *desc, int i,
- struct pte_list_desc *prev_desc)
-{
- int j = desc->spte_count - 1;
-
- desc->sptes[i] = desc->sptes[j];
- desc->sptes[j] = NULL;
- desc->spte_count--;
- if (desc->spte_count)
- return;
- if (!prev_desc && !desc->more)
- rmap_head->val = 0;
- else
- if (prev_desc)
- prev_desc->more = desc->more;
- else
- rmap_head->val = (unsigned long)desc->more | 1;
- mmu_free_pte_list_desc(desc);
-}
-
-static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
-{
- struct pte_list_desc *desc;
- struct pte_list_desc *prev_desc;
- int i;
-
- if (!rmap_head->val) {
- pr_err("%s: %p 0->BUG\n", __func__, spte);
- BUG();
- } else if (!(rmap_head->val & 1)) {
- rmap_printk("%p 1->0\n", spte);
- if ((u64 *)rmap_head->val != spte) {
- pr_err("%s: %p 1->BUG\n", __func__, spte);
- BUG();
- }
- rmap_head->val = 0;
- } else {
- rmap_printk("%p many->many\n", spte);
- desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
- prev_desc = NULL;
- while (desc) {
- for (i = 0; i < desc->spte_count; ++i) {
- if (desc->sptes[i] == spte) {
- pte_list_desc_remove_entry(rmap_head,
- desc, i, prev_desc);
- return;
- }
- }
- prev_desc = desc;
- desc = desc->more;
- }
- pr_err("%s: %p many->many\n", __func__, spte);
- BUG();
- }
-}
-
static void kvm_zap_one_rmap_spte(struct kvm *kvm,
struct kvm_rmap_head *rmap_head, u64 *sptep)
{
@@ -1011,7 +883,7 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
for (i = 0; i < desc->spte_count; i++)
mmu_spte_clear_track_bits(kvm, desc->sptes[i]);
next = desc->more;
- mmu_free_pte_list_desc(desc);
+ free_pte_list_desc(desc);
}
out:
/* rmap_head is meaningless now, remember to reset it */
@@ -1019,26 +891,6 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
return true;
}

-unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
-{
- struct pte_list_desc *desc;
- unsigned int count = 0;
-
- if (!rmap_head->val)
- return 0;
- else if (!(rmap_head->val & 1))
- return 1;
-
- desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
-
- while (desc) {
- count += desc->spte_count;
- desc = desc->more;
- }
-
- return count;
-}
-
static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
const struct kvm_memory_slot *slot)
{
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index dbaf6755c5a7..cd1c8f32269d 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -166,7 +166,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
int min_level);
void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
u64 start_gfn, u64 pages);
-unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);

extern int nx_huge_pages;
static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
diff --git a/arch/x86/kvm/mmu/rmap.c b/arch/x86/kvm/mmu/rmap.c
new file mode 100644
index 000000000000..daa99dee0709
--- /dev/null
+++ b/arch/x86/kvm/mmu/rmap.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "mmu.h"
+#include "mmu_internal.h"
+#include "mmutrace.h"
+#include "rmap.h"
+#include "spte.h"
+
+#include <asm/cmpxchg.h>
+#include <trace/events/kvm.h>
+
+/*
+ * About rmap_head encoding:
+ *
+ * If the bit zero of rmap_head->val is clear, then it points to the only spte
+ * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
+ * pte_list_desc containing more mappings.
+ */
+
+/*
+ * Returns the number of pointers in the rmap chain, not counting the new one.
+ */
+int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
+ struct kvm_rmap_head *rmap_head)
+{
+ struct pte_list_desc *desc;
+ int count = 0;
+
+ if (!rmap_head->val) {
+ rmap_printk("%p %llx 0->1\n", spte, *spte);
+ rmap_head->val = (unsigned long)spte;
+ } else if (!(rmap_head->val & 1)) {
+ rmap_printk("%p %llx 1->many\n", spte, *spte);
+ desc = kvm_mmu_memory_cache_alloc(cache);
+ desc->sptes[0] = (u64 *)rmap_head->val;
+ desc->sptes[1] = spte;
+ desc->spte_count = 2;
+ rmap_head->val = (unsigned long)desc | 1;
+ ++count;
+ } else {
+ rmap_printk("%p %llx many->many\n", spte, *spte);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ while (desc->spte_count == PTE_LIST_EXT) {
+ count += PTE_LIST_EXT;
+ if (!desc->more) {
+ desc->more = kvm_mmu_memory_cache_alloc(cache);
+ desc = desc->more;
+ desc->spte_count = 0;
+ break;
+ }
+ desc = desc->more;
+ }
+ count += desc->spte_count;
+ desc->sptes[desc->spte_count++] = spte;
+ }
+ return count;
+}
+
+void free_pte_list_desc(struct pte_list_desc *pte_list_desc)
+{
+ kmem_cache_free(pte_list_desc_cache, pte_list_desc);
+}
+
+static void
+pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
+ struct pte_list_desc *desc, int i,
+ struct pte_list_desc *prev_desc)
+{
+ int j = desc->spte_count - 1;
+
+ desc->sptes[i] = desc->sptes[j];
+ desc->sptes[j] = NULL;
+ desc->spte_count--;
+ if (desc->spte_count)
+ return;
+ if (!prev_desc && !desc->more)
+ rmap_head->val = 0;
+ else
+ if (prev_desc)
+ prev_desc->more = desc->more;
+ else
+ rmap_head->val = (unsigned long)desc->more | 1;
+ free_pte_list_desc(desc);
+}
+
+void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
+{
+ struct pte_list_desc *desc;
+ struct pte_list_desc *prev_desc;
+ int i;
+
+ if (!rmap_head->val) {
+ pr_err("%s: %p 0->BUG\n", __func__, spte);
+ BUG();
+ } else if (!(rmap_head->val & 1)) {
+ rmap_printk("%p 1->0\n", spte);
+ if ((u64 *)rmap_head->val != spte) {
+ pr_err("%s: %p 1->BUG\n", __func__, spte);
+ BUG();
+ }
+ rmap_head->val = 0;
+ } else {
+ rmap_printk("%p many->many\n", spte);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ prev_desc = NULL;
+ while (desc) {
+ for (i = 0; i < desc->spte_count; ++i) {
+ if (desc->sptes[i] == spte) {
+ pte_list_desc_remove_entry(rmap_head,
+ desc, i, prev_desc);
+ return;
+ }
+ }
+ prev_desc = desc;
+ desc = desc->more;
+ }
+ pr_err("%s: %p many->many\n", __func__, spte);
+ BUG();
+ }
+}
+
+unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
+{
+ struct pte_list_desc *desc;
+ unsigned int count = 0;
+
+ if (!rmap_head->val)
+ return 0;
+ else if (!(rmap_head->val & 1))
+ return 1;
+
+ desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+
+ while (desc) {
+ count += desc->spte_count;
+ desc = desc->more;
+ }
+
+ return count;
+}
+
diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
new file mode 100644
index 000000000000..059765b6e066
--- /dev/null
+++ b/arch/x86/kvm/mmu/rmap.h
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef __KVM_X86_MMU_RMAP_H
+#define __KVM_X86_MMU_RMAP_H
+
+#include <linux/kvm_host.h>
+
+/* make pte_list_desc fit well in cache lines */
+#define PTE_LIST_EXT 14
+
+/*
+ * Slight optimization of cacheline layout, by putting `more' and `spte_count'
+ * at the start; then accessing it will only use one single cacheline for
+ * either full (entries==PTE_LIST_EXT) case or entries<=6.
+ */
+struct pte_list_desc {
+ struct pte_list_desc *more;
+ /*
+ * Stores number of entries stored in the pte_list_desc. No need to be
+ * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
+ */
+ u64 spte_count;
+ u64 *sptes[PTE_LIST_EXT];
+};
+
+static struct kmem_cache *pte_list_desc_cache;
+
+int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
+ struct kvm_rmap_head *rmap_head);
+void free_pte_list_desc(struct pte_list_desc *pte_list_desc);
+void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head);
+unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
+
+#endif /* __KVM_X86_MMU_RMAP_H */
--
2.39.0.rc0.267.gcb52ba06e7-goog

2022-12-06 18:01:06

by Ben Gardon

[permalink] [raw]
Subject: [PATCH 3/7] KVM: x86/MMU: Move gfn_to_rmap() to rmap.c

Move gfn_to_rmap() to rmap.c. While the function is not part of
manipulating the rmap, it is the main way that the MMU gets pointers to
the rmaps.

No functional change intended.

Signed-off-by: Ben Gardon <[email protected]>
---
arch/x86/kvm/mmu/mmu.c | 9 ---------
arch/x86/kvm/mmu/rmap.c | 8 ++++++++
arch/x86/kvm/mmu/rmap.h | 2 ++
3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c3a7f443a213..f8d7201210c8 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -891,15 +891,6 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
return true;
}

-static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
- const struct kvm_memory_slot *slot)
-{
- unsigned long idx;
-
- idx = gfn_to_index(gfn, slot->base_gfn, level);
- return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
-}
-
static bool rmap_can_add(struct kvm_vcpu *vcpu)
{
struct kvm_mmu_memory_cache *mc;
diff --git a/arch/x86/kvm/mmu/rmap.c b/arch/x86/kvm/mmu/rmap.c
index c3bad366b627..272e89147d96 100644
--- a/arch/x86/kvm/mmu/rmap.c
+++ b/arch/x86/kvm/mmu/rmap.c
@@ -200,3 +200,11 @@ u64 *rmap_get_next(struct rmap_iterator *iter)
return sptep;
}

+struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
+ const struct kvm_memory_slot *slot)
+{
+ unsigned long idx;
+
+ idx = gfn_to_index(gfn, slot->base_gfn, level);
+ return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
+}
diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
index 13b265f3a95e..45732eda57e5 100644
--- a/arch/x86/kvm/mmu/rmap.h
+++ b/arch/x86/kvm/mmu/rmap.h
@@ -49,4 +49,6 @@ u64 *rmap_get_next(struct rmap_iterator *iter);
for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \
_spte_; _spte_ = rmap_get_next(_iter_))

+struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
+ const struct kvm_memory_slot *slot);
#endif /* __KVM_X86_MMU_RMAP_H */
--
2.39.0.rc0.267.gcb52ba06e7-goog

2022-12-06 18:01:09

by Ben Gardon

[permalink] [raw]
Subject: [PATCH 4/7] KVM: x86/MMU: Move rmap_can_add() and rmap_remove() to rmap.c

Move the functions to check if an entry can be added to an rmap and for
removing elements from an rmap to rmap.(c|h).

No functional change intended.

Signed-off-by: Ben Gardon <[email protected]>
---
arch/x86/kvm/mmu/mmu.c | 34 +--------------------------------
arch/x86/kvm/mmu/mmu_internal.h | 1 +
arch/x86/kvm/mmu/rmap.c | 32 +++++++++++++++++++++++++++++++
arch/x86/kvm/mmu/rmap.h | 3 +++
4 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f8d7201210c8..52e487d89d54 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -658,7 +658,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)

static bool sp_has_gptes(struct kvm_mmu_page *sp);

-static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
+gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
{
if (sp->role.passthrough)
return sp->gfn;
@@ -891,38 +891,6 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
return true;
}

-static bool rmap_can_add(struct kvm_vcpu *vcpu)
-{
- struct kvm_mmu_memory_cache *mc;
-
- mc = &vcpu->arch.mmu_pte_list_desc_cache;
- return kvm_mmu_memory_cache_nr_free_objects(mc);
-}
-
-static void rmap_remove(struct kvm *kvm, u64 *spte)
-{
- struct kvm_memslots *slots;
- struct kvm_memory_slot *slot;
- struct kvm_mmu_page *sp;
- gfn_t gfn;
- struct kvm_rmap_head *rmap_head;
-
- sp = sptep_to_sp(spte);
- gfn = kvm_mmu_page_get_gfn(sp, spte_index(spte));
-
- /*
- * Unlike rmap_add, rmap_remove does not run in the context of a vCPU
- * so we have to determine which memslots to use based on context
- * information in sp->role.
- */
- slots = kvm_memslots_for_spte_role(kvm, sp->role);
-
- slot = __gfn_to_memslot(slots, gfn);
- rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
-
- pte_list_remove(spte, rmap_head);
-}
-
static void drop_spte(struct kvm *kvm, u64 *sptep)
{
u64 old_spte = mmu_spte_clear_track_bits(kvm, sptep);
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index cd1c8f32269d..3de703c2a5d4 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -318,4 +318,5 @@ void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);

+gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
#endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/mmu/rmap.c b/arch/x86/kvm/mmu/rmap.c
index 272e89147d96..6833676aa9ea 100644
--- a/arch/x86/kvm/mmu/rmap.c
+++ b/arch/x86/kvm/mmu/rmap.c
@@ -208,3 +208,35 @@ struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
idx = gfn_to_index(gfn, slot->base_gfn, level);
return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
}
+
+bool rmap_can_add(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mmu_memory_cache *mc;
+
+ mc = &vcpu->arch.mmu_pte_list_desc_cache;
+ return kvm_mmu_memory_cache_nr_free_objects(mc);
+}
+
+void rmap_remove(struct kvm *kvm, u64 *spte)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *slot;
+ struct kvm_mmu_page *sp;
+ gfn_t gfn;
+ struct kvm_rmap_head *rmap_head;
+
+ sp = sptep_to_sp(spte);
+ gfn = kvm_mmu_page_get_gfn(sp, spte_index(spte));
+
+ /*
+ * Unlike rmap_add, rmap_remove does not run in the context of a vCPU
+ * so we have to determine which memslots to use based on context
+ * information in sp->role.
+ */
+ slots = kvm_memslots_for_spte_role(kvm, sp->role);
+
+ slot = __gfn_to_memslot(slots, gfn);
+ rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
+
+ pte_list_remove(spte, rmap_head);
+}
diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
index 45732eda57e5..81df186ba3c3 100644
--- a/arch/x86/kvm/mmu/rmap.h
+++ b/arch/x86/kvm/mmu/rmap.h
@@ -51,4 +51,7 @@ u64 *rmap_get_next(struct rmap_iterator *iter);

struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
const struct kvm_memory_slot *slot);
+
+bool rmap_can_add(struct kvm_vcpu *vcpu);
+void rmap_remove(struct kvm *kvm, u64 *spte);
#endif /* __KVM_X86_MMU_RMAP_H */
--
2.39.0.rc0.267.gcb52ba06e7-goog

2022-12-06 18:37:24

by Ben Gardon

[permalink] [raw]
Subject: [PATCH 2/7] KVM: x86/MMU: Move rmap_iterator to rmap.h

In continuing to factor the rmap out of mmu.c, move the rmap_iterator
and associated functions and macros into rmap.(c|h).

No functional change intended.

Signed-off-by: Ben Gardon <[email protected]>
---
arch/x86/kvm/mmu/mmu.c | 76 -----------------------------------------
arch/x86/kvm/mmu/rmap.c | 61 +++++++++++++++++++++++++++++++++
arch/x86/kvm/mmu/rmap.h | 18 ++++++++++
3 files changed, 79 insertions(+), 76 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 90b3735d6064..c3a7f443a213 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -932,82 +932,6 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
pte_list_remove(spte, rmap_head);
}

-/*
- * Used by the following functions to iterate through the sptes linked by a
- * rmap. All fields are private and not assumed to be used outside.
- */
-struct rmap_iterator {
- /* private fields */
- struct pte_list_desc *desc; /* holds the sptep if not NULL */
- int pos; /* index of the sptep */
-};
-
-/*
- * Iteration must be started by this function. This should also be used after
- * removing/dropping sptes from the rmap link because in such cases the
- * information in the iterator may not be valid.
- *
- * Returns sptep if found, NULL otherwise.
- */
-static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
- struct rmap_iterator *iter)
-{
- u64 *sptep;
-
- if (!rmap_head->val)
- return NULL;
-
- if (!(rmap_head->val & 1)) {
- iter->desc = NULL;
- sptep = (u64 *)rmap_head->val;
- goto out;
- }
-
- iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
- iter->pos = 0;
- sptep = iter->desc->sptes[iter->pos];
-out:
- BUG_ON(!is_shadow_present_pte(*sptep));
- return sptep;
-}
-
-/*
- * Must be used with a valid iterator: e.g. after rmap_get_first().
- *
- * Returns sptep if found, NULL otherwise.
- */
-static u64 *rmap_get_next(struct rmap_iterator *iter)
-{
- u64 *sptep;
-
- if (iter->desc) {
- if (iter->pos < PTE_LIST_EXT - 1) {
- ++iter->pos;
- sptep = iter->desc->sptes[iter->pos];
- if (sptep)
- goto out;
- }
-
- iter->desc = iter->desc->more;
-
- if (iter->desc) {
- iter->pos = 0;
- /* desc->sptes[0] cannot be NULL */
- sptep = iter->desc->sptes[iter->pos];
- goto out;
- }
- }
-
- return NULL;
-out:
- BUG_ON(!is_shadow_present_pte(*sptep));
- return sptep;
-}
-
-#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \
- for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \
- _spte_; _spte_ = rmap_get_next(_iter_))
-
static void drop_spte(struct kvm *kvm, u64 *sptep)
{
u64 old_spte = mmu_spte_clear_track_bits(kvm, sptep);
diff --git a/arch/x86/kvm/mmu/rmap.c b/arch/x86/kvm/mmu/rmap.c
index daa99dee0709..c3bad366b627 100644
--- a/arch/x86/kvm/mmu/rmap.c
+++ b/arch/x86/kvm/mmu/rmap.c
@@ -139,3 +139,64 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
return count;
}

+/*
+ * Iteration must be started by this function. This should also be used after
+ * removing/dropping sptes from the rmap link because in such cases the
+ * information in the iterator may not be valid.
+ *
+ * Returns sptep if found, NULL otherwise.
+ */
+u64 *rmap_get_first(struct kvm_rmap_head *rmap_head, struct rmap_iterator *iter)
+{
+ u64 *sptep;
+
+ if (!rmap_head->val)
+ return NULL;
+
+ if (!(rmap_head->val & 1)) {
+ iter->desc = NULL;
+ sptep = (u64 *)rmap_head->val;
+ goto out;
+ }
+
+ iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ iter->pos = 0;
+ sptep = iter->desc->sptes[iter->pos];
+out:
+ BUG_ON(!is_shadow_present_pte(*sptep));
+ return sptep;
+}
+
+/*
+ * Must be used with a valid iterator: e.g. after rmap_get_first().
+ *
+ * Returns sptep if found, NULL otherwise.
+ */
+u64 *rmap_get_next(struct rmap_iterator *iter)
+{
+ u64 *sptep;
+
+ if (iter->desc) {
+ if (iter->pos < PTE_LIST_EXT - 1) {
+ ++iter->pos;
+ sptep = iter->desc->sptes[iter->pos];
+ if (sptep)
+ goto out;
+ }
+
+ iter->desc = iter->desc->more;
+
+ if (iter->desc) {
+ iter->pos = 0;
+ /* desc->sptes[0] cannot be NULL */
+ sptep = iter->desc->sptes[iter->pos];
+ goto out;
+ }
+ }
+
+ return NULL;
+out:
+ BUG_ON(!is_shadow_present_pte(*sptep));
+ return sptep;
+}
+
diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
index 059765b6e066..13b265f3a95e 100644
--- a/arch/x86/kvm/mmu/rmap.h
+++ b/arch/x86/kvm/mmu/rmap.h
@@ -31,4 +31,22 @@ void free_pte_list_desc(struct pte_list_desc *pte_list_desc);
void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head);
unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);

+/*
+ * Used by the following functions to iterate through the sptes linked by a
+ * rmap. All fields are private and not assumed to be used outside.
+ */
+struct rmap_iterator {
+ /* private fields */
+ struct pte_list_desc *desc; /* holds the sptep if not NULL */
+ int pos; /* index of the sptep */
+};
+
+u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
+ struct rmap_iterator *iter);
+u64 *rmap_get_next(struct rmap_iterator *iter);
+
+#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \
+ for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \
+ _spte_; _spte_ = rmap_get_next(_iter_))
+
#endif /* __KVM_X86_MMU_RMAP_H */
--
2.39.0.rc0.267.gcb52ba06e7-goog

2022-12-06 22:14:47

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 1/7] KVM: x86/MMU: Move pte_list operations to rmap.c

Hi Ben,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on kvm/queue]
[also build test WARNING on mst-vhost/linux-next]
[cannot apply to kvm/linux-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Ben-Gardon/KVM-x86-MMU-Factor-rmap-operations-out-of-mmu-c/20221207-013733
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
patch link: https://lore.kernel.org/r/20221206173601.549281-2-bgardon%40google.com
patch subject: [PATCH 1/7] KVM: x86/MMU: Move pte_list operations to rmap.c
config: x86_64-allyesconfig
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/22e700d90cb6d8c054c5cacffe0be568004918a8
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Ben-Gardon/KVM-x86-MMU-Factor-rmap-operations-out-of-mmu-c/20221207-013733
git checkout 22e700d90cb6d8c054c5cacffe0be568004918a8
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash arch/x86/kvm/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

In file included from arch/x86/kvm/debugfs.c:12:
>> arch/x86/kvm/mmu/rmap.h:26:27: warning: 'pte_list_desc_cache' defined but not used [-Wunused-variable]
26 | static struct kmem_cache *pte_list_desc_cache;
| ^~~~~~~~~~~~~~~~~~~


vim +/pte_list_desc_cache +26 arch/x86/kvm/mmu/rmap.h

25
> 26 static struct kmem_cache *pte_list_desc_cache;
27

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (1.97 kB)
config (297.61 kB)
Download all attachments

2022-12-07 23:26:41

by Vipin Sharma

[permalink] [raw]
Subject: Re: [PATCH 1/7] KVM: x86/MMU: Move pte_list operations to rmap.c

On Tue, Dec 6, 2022 at 9:36 AM Ben Gardon <[email protected]> wrote:
>
> In the interest of eventually splitting the Shadow MMU out of mmu.c,
> start by moving some of the operations for manipulating pte_lists out of
> mmu.c and into a new pair of files: rmap.c and rmap.h.
>
> No functional change intended.
>
> Signed-off-by: Ben Gardon <[email protected]>
> ---
> arch/x86/kvm/Makefile | 2 +-
> arch/x86/kvm/debugfs.c | 1 +
> arch/x86/kvm/mmu/mmu.c | 152 +-------------------------------
> arch/x86/kvm/mmu/mmu_internal.h | 1 -
> arch/x86/kvm/mmu/rmap.c | 141 +++++++++++++++++++++++++++++
> arch/x86/kvm/mmu/rmap.h | 34 +++++++
> 6 files changed, 179 insertions(+), 152 deletions(-)
> create mode 100644 arch/x86/kvm/mmu/rmap.c
> create mode 100644 arch/x86/kvm/mmu/rmap.h
>
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index 80e3fe184d17..9f766eebeddf 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -12,7 +12,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
> kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
> i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
> hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
> - mmu/spte.o
> + mmu/spte.o mmu/rmap.o
>
> ifdef CONFIG_HYPERV
> kvm-y += kvm_onhyperv.o
> diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
> index c1390357126a..29f692ecd6f3 100644
> --- a/arch/x86/kvm/debugfs.c
> +++ b/arch/x86/kvm/debugfs.c
> @@ -9,6 +9,7 @@
> #include "lapic.h"
> #include "mmu.h"
> #include "mmu/mmu_internal.h"
> +#include "mmu/rmap.h"
>
> static int vcpu_get_timer_advance_ns(void *data, u64 *val)
> {
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 4736d7849c60..90b3735d6064 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -26,6 +26,7 @@
> #include "kvm_emulate.h"
> #include "cpuid.h"
> #include "spte.h"
> +#include "rmap.h"
>
> #include <linux/kvm_host.h>
> #include <linux/types.h>
> @@ -112,24 +113,6 @@ module_param(dbg, bool, 0644);
>
> #include <trace/events/kvm.h>
>
> -/* make pte_list_desc fit well in cache lines */
> -#define PTE_LIST_EXT 14
> -
> -/*
> - * Slight optimization of cacheline layout, by putting `more' and `spte_count'
> - * at the start; then accessing it will only use one single cacheline for
> - * either full (entries==PTE_LIST_EXT) case or entries<=6.
> - */
> -struct pte_list_desc {
> - struct pte_list_desc *more;
> - /*
> - * Stores number of entries stored in the pte_list_desc. No need to be
> - * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
> - */
> - u64 spte_count;
> - u64 *sptes[PTE_LIST_EXT];
> -};
> -
> struct kvm_shadow_walk_iterator {
> u64 addr;
> hpa_t shadow_addr;
> @@ -155,7 +138,6 @@ struct kvm_shadow_walk_iterator {
> ({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \
> __shadow_walk_next(&(_walker), spte))
>
> -static struct kmem_cache *pte_list_desc_cache;
> struct kmem_cache *mmu_page_header_cache;
> static struct percpu_counter kvm_total_used_mmu_pages;
>
> @@ -674,11 +656,6 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
> kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
> }
>
> -static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
> -{
> - kmem_cache_free(pte_list_desc_cache, pte_list_desc);
> -}
> -
> static bool sp_has_gptes(struct kvm_mmu_page *sp);
>
> static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
> @@ -878,111 +855,6 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
> return slot;
> }
>
> -/*
> - * About rmap_head encoding:
> - *
> - * If the bit zero of rmap_head->val is clear, then it points to the only spte
> - * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
> - * pte_list_desc containing more mappings.
> - */
> -
> -/*
> - * Returns the number of pointers in the rmap chain, not counting the new one.
> - */
> -static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
> - struct kvm_rmap_head *rmap_head)
> -{
> - struct pte_list_desc *desc;
> - int count = 0;
> -
> - if (!rmap_head->val) {
> - rmap_printk("%p %llx 0->1\n", spte, *spte);
> - rmap_head->val = (unsigned long)spte;
> - } else if (!(rmap_head->val & 1)) {
> - rmap_printk("%p %llx 1->many\n", spte, *spte);
> - desc = kvm_mmu_memory_cache_alloc(cache);
> - desc->sptes[0] = (u64 *)rmap_head->val;
> - desc->sptes[1] = spte;
> - desc->spte_count = 2;
> - rmap_head->val = (unsigned long)desc | 1;
> - ++count;
> - } else {
> - rmap_printk("%p %llx many->many\n", spte, *spte);
> - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> - while (desc->spte_count == PTE_LIST_EXT) {
> - count += PTE_LIST_EXT;
> - if (!desc->more) {
> - desc->more = kvm_mmu_memory_cache_alloc(cache);
> - desc = desc->more;
> - desc->spte_count = 0;
> - break;
> - }
> - desc = desc->more;
> - }
> - count += desc->spte_count;
> - desc->sptes[desc->spte_count++] = spte;
> - }
> - return count;
> -}
> -
> -static void
> -pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
> - struct pte_list_desc *desc, int i,
> - struct pte_list_desc *prev_desc)
> -{
> - int j = desc->spte_count - 1;
> -
> - desc->sptes[i] = desc->sptes[j];
> - desc->sptes[j] = NULL;
> - desc->spte_count--;
> - if (desc->spte_count)
> - return;
> - if (!prev_desc && !desc->more)
> - rmap_head->val = 0;
> - else
> - if (prev_desc)
> - prev_desc->more = desc->more;
> - else
> - rmap_head->val = (unsigned long)desc->more | 1;
> - mmu_free_pte_list_desc(desc);
> -}
> -
> -static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
> -{
> - struct pte_list_desc *desc;
> - struct pte_list_desc *prev_desc;
> - int i;
> -
> - if (!rmap_head->val) {
> - pr_err("%s: %p 0->BUG\n", __func__, spte);
> - BUG();
> - } else if (!(rmap_head->val & 1)) {
> - rmap_printk("%p 1->0\n", spte);
> - if ((u64 *)rmap_head->val != spte) {
> - pr_err("%s: %p 1->BUG\n", __func__, spte);
> - BUG();
> - }
> - rmap_head->val = 0;
> - } else {
> - rmap_printk("%p many->many\n", spte);
> - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> - prev_desc = NULL;
> - while (desc) {
> - for (i = 0; i < desc->spte_count; ++i) {
> - if (desc->sptes[i] == spte) {
> - pte_list_desc_remove_entry(rmap_head,
> - desc, i, prev_desc);
> - return;
> - }
> - }
> - prev_desc = desc;
> - desc = desc->more;
> - }
> - pr_err("%s: %p many->many\n", __func__, spte);
> - BUG();
> - }
> -}
> -
> static void kvm_zap_one_rmap_spte(struct kvm *kvm,
> struct kvm_rmap_head *rmap_head, u64 *sptep)
> {
> @@ -1011,7 +883,7 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
> for (i = 0; i < desc->spte_count; i++)
> mmu_spte_clear_track_bits(kvm, desc->sptes[i]);
> next = desc->more;
> - mmu_free_pte_list_desc(desc);
> + free_pte_list_desc(desc);
> }
> out:
> /* rmap_head is meaningless now, remember to reset it */
> @@ -1019,26 +891,6 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
> return true;
> }
>
> -unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
> -{
> - struct pte_list_desc *desc;
> - unsigned int count = 0;
> -
> - if (!rmap_head->val)
> - return 0;
> - else if (!(rmap_head->val & 1))
> - return 1;
> -
> - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> -
> - while (desc) {
> - count += desc->spte_count;
> - desc = desc->more;
> - }
> -
> - return count;
> -}
> -
> static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
> const struct kvm_memory_slot *slot)
> {
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index dbaf6755c5a7..cd1c8f32269d 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -166,7 +166,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
> int min_level);
> void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
> u64 start_gfn, u64 pages);
> -unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
>
> extern int nx_huge_pages;
> static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
> diff --git a/arch/x86/kvm/mmu/rmap.c b/arch/x86/kvm/mmu/rmap.c
> new file mode 100644
> index 000000000000..daa99dee0709
> --- /dev/null
> +++ b/arch/x86/kvm/mmu/rmap.c
> @@ -0,0 +1,141 @@
> +// SPDX-License-Identifier: GPL-2.0
> +

A comment would be nice to write expectations from this file and what
code lives here.

> +#include "mmu.h"
> +#include "mmu_internal.h"
> +#include "mmutrace.h"
> +#include "rmap.h"
> +#include "spte.h"
> +
> +#include <asm/cmpxchg.h>
> +#include <trace/events/kvm.h>
> +
> +/*
> + * About rmap_head encoding:
> + *
> + * If the bit zero of rmap_head->val is clear, then it points to the only spte
> + * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
> + * pte_list_desc containing more mappings.
> + */
> +
> +/*
> + * Returns the number of pointers in the rmap chain, not counting the new one.
> + */
> +int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
> + struct kvm_rmap_head *rmap_head)
> +{
> + struct pte_list_desc *desc;
> + int count = 0;
> +
> + if (!rmap_head->val) {
> + rmap_printk("%p %llx 0->1\n", spte, *spte);
> + rmap_head->val = (unsigned long)spte;
> + } else if (!(rmap_head->val & 1)) {
> + rmap_printk("%p %llx 1->many\n", spte, *spte);
> + desc = kvm_mmu_memory_cache_alloc(cache);
> + desc->sptes[0] = (u64 *)rmap_head->val;
> + desc->sptes[1] = spte;
> + desc->spte_count = 2;
> + rmap_head->val = (unsigned long)desc | 1;
> + ++count;
> + } else {
> + rmap_printk("%p %llx many->many\n", spte, *spte);
> + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> + while (desc->spte_count == PTE_LIST_EXT) {
> + count += PTE_LIST_EXT;
> + if (!desc->more) {
> + desc->more = kvm_mmu_memory_cache_alloc(cache);
> + desc = desc->more;
> + desc->spte_count = 0;
> + break;
> + }
> + desc = desc->more;
> + }
> + count += desc->spte_count;
> + desc->sptes[desc->spte_count++] = spte;
> + }
> + return count;
> +}
> +
> +void free_pte_list_desc(struct pte_list_desc *pte_list_desc)
> +{
> + kmem_cache_free(pte_list_desc_cache, pte_list_desc);
> +}
> +
> +static void
> +pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
> + struct pte_list_desc *desc, int i,
> + struct pte_list_desc *prev_desc)
> +{
> + int j = desc->spte_count - 1;
> +
> + desc->sptes[i] = desc->sptes[j];
> + desc->sptes[j] = NULL;
> + desc->spte_count--;
> + if (desc->spte_count)
> + return;
> + if (!prev_desc && !desc->more)
> + rmap_head->val = 0;
> + else
> + if (prev_desc)
> + prev_desc->more = desc->more;
> + else
> + rmap_head->val = (unsigned long)desc->more | 1;
> + free_pte_list_desc(desc);
> +}
> +
> +void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
> +{
> + struct pte_list_desc *desc;
> + struct pte_list_desc *prev_desc;
> + int i;
> +
> + if (!rmap_head->val) {
> + pr_err("%s: %p 0->BUG\n", __func__, spte);
> + BUG();
> + } else if (!(rmap_head->val & 1)) {
> + rmap_printk("%p 1->0\n", spte);
> + if ((u64 *)rmap_head->val != spte) {
> + pr_err("%s: %p 1->BUG\n", __func__, spte);
> + BUG();
> + }
> + rmap_head->val = 0;
> + } else {
> + rmap_printk("%p many->many\n", spte);
> + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> + prev_desc = NULL;
> + while (desc) {
> + for (i = 0; i < desc->spte_count; ++i) {
> + if (desc->sptes[i] == spte) {
> + pte_list_desc_remove_entry(rmap_head,
> + desc, i, prev_desc);
> + return;
> + }
> + }
> + prev_desc = desc;
> + desc = desc->more;
> + }
> + pr_err("%s: %p many->many\n", __func__, spte);
> + BUG();
> + }
> +}
> +
> +unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
> +{
> + struct pte_list_desc *desc;
> + unsigned int count = 0;
> +
> + if (!rmap_head->val)
> + return 0;
> + else if (!(rmap_head->val & 1))
> + return 1;
> +
> + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> +
> + while (desc) {
> + count += desc->spte_count;
> + desc = desc->more;
> + }
> +
> + return count;
> +}
> +
> diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
> new file mode 100644
> index 000000000000..059765b6e066
> --- /dev/null
> +++ b/arch/x86/kvm/mmu/rmap.h
> @@ -0,0 +1,34 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#ifndef __KVM_X86_MMU_RMAP_H
> +#define __KVM_X86_MMU_RMAP_H
> +
> +#include <linux/kvm_host.h>
> +
> +/* make pte_list_desc fit well in cache lines */
> +#define PTE_LIST_EXT 14
> +
> +/*
> + * Slight optimization of cacheline layout, by putting `more' and `spte_count'
> + * at the start; then accessing it will only use one single cacheline for
> + * either full (entries==PTE_LIST_EXT) case or entries<=6.
> + */
> +struct pte_list_desc {
> + struct pte_list_desc *more;
> + /*
> + * Stores number of entries stored in the pte_list_desc. No need to be
> + * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
> + */
> + u64 spte_count;
> + u64 *sptes[PTE_LIST_EXT];
> +};
> +
> +static struct kmem_cache *pte_list_desc_cache;

Does it make sense to make it non static and extern here. Also, you
can provide an init function which can be called from mmu.c?


> +
> +int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
> + struct kvm_rmap_head *rmap_head);
> +void free_pte_list_desc(struct pte_list_desc *pte_list_desc);
> +void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head);
> +unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> +

Similar to tdp_mmu, and other rmap functions in next patches in the
series should above functions be prefixed with "rmap_"?


> +#endif /* __KVM_X86_MMU_RMAP_H */
> --
> 2.39.0.rc0.267.gcb52ba06e7-goog
>

2022-12-09 23:15:29

by David Matlack

[permalink] [raw]
Subject: Re: [PATCH 1/7] KVM: x86/MMU: Move pte_list operations to rmap.c

On Tue, Dec 06, 2022 at 05:35:55PM +0000, Ben Gardon wrote:
> In the interest of eventually splitting the Shadow MMU out of mmu.c,
> start by moving some of the operations for manipulating pte_lists out of
> mmu.c and into a new pair of files: rmap.c and rmap.h.
>
> No functional change intended.
>
> Signed-off-by: Ben Gardon <[email protected]>
> ---
[...]
> diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
> new file mode 100644
> index 000000000000..059765b6e066
> --- /dev/null
> +++ b/arch/x86/kvm/mmu/rmap.h
> @@ -0,0 +1,34 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#ifndef __KVM_X86_MMU_RMAP_H
> +#define __KVM_X86_MMU_RMAP_H
> +
> +#include <linux/kvm_host.h>
> +
> +/* make pte_list_desc fit well in cache lines */
> +#define PTE_LIST_EXT 14
> +
> +/*
> + * Slight optimization of cacheline layout, by putting `more' and `spte_count'
> + * at the start; then accessing it will only use one single cacheline for
> + * either full (entries==PTE_LIST_EXT) case or entries<=6.
> + */
> +struct pte_list_desc {
> + struct pte_list_desc *more;
> + /*
> + * Stores number of entries stored in the pte_list_desc. No need to be
> + * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
> + */
> + u64 spte_count;
> + u64 *sptes[PTE_LIST_EXT];
> +};
> +
> +static struct kmem_cache *pte_list_desc_cache;

The definition of pte_list_desc_cache needs to go in a C file since it's
a global variable. Since it now needs to be accessed by more than once C
file, drop the static. Then it can be accessed with extern.

Since most of the code that sets up and deals with pte_list_desc_cache
is still in mmu.c, my vote is to keep the definition there.

i.e.

mmu.c:

struct kmem_cache *pte_list_desc_cache;

rmap.c

extern struct kmem_cache *pte_list_desc_cache;

And no need for anything in rmap.h.

2022-12-09 23:30:46

by David Matlack

[permalink] [raw]
Subject: Re: [PATCH 0/7] KVM: x86/MMU: Factor rmap operations out of mmu.c

On Tue, Dec 06, 2022 at 05:35:54PM +0000, Ben Gardon wrote:
> Move the basic operations for manipulating an rmap out of mmu.c, into
> their own files.

memslot_rmap_alloc() and memslot_rmap_free() from x86.c look like good
candidates to move to rmap.c as well as they are pure setup/teardown.

>
> This is the first step in a series of series to split the Shadow MMU out of
> mmu.c. Splitting the shadow MMU out results in about a 50% reduction in line
> count of mmu.c, from ~7K to ~3.5K. The rmap operations only represent about
> 10% of the Shadow MMU but are split out first becase the are relatively
> self-contained.
>
> This split may also help facilitate the addition of kUnit tests for rmap
> manipulation, especially the fiddly bit flag which diferentiates a direct
> pointer from a pte_list_desc.
>
> This effort is complimentary to David Matlack's proposal to refactor
> the TDP MMU into an arch-neutral implementation because it further
> clarifies the distinction between the Shadow MMU and TDP MMU within x86.
>
> This series contains no functional changes. It's just a direct movement
> of code from one file to another.
>
> Whether this rmap code should stay in its own file or get merged with
> the rest of the shadow MMU code as it is factored out is open for
> debate.
>
> Ben Gardon (7):
> KVM: x86/MMU: Move pte_list operations to rmap.c
> KVM: x86/MMU: Move rmap_iterator to rmap.h
> KVM: x86/MMU: Move gfn_to_rmap() to rmap.c
> KVM: x86/MMU: Move rmap_can_add() and rmap_remove() to rmap.c
> KVM: x86/MMU: Move the rmap walk iterator out of mmu.c
> KVM: x86/MMU: Move rmap zap operations to rmap.c
> KVM: x86/MMU: Move rmap_add() to rmap.c
>
> arch/x86/kvm/Makefile | 2 +-
> arch/x86/kvm/debugfs.c | 1 +
> arch/x86/kvm/mmu/mmu.c | 438 +-------------------------------
> arch/x86/kvm/mmu/mmu_internal.h | 9 +-
> arch/x86/kvm/mmu/rmap.c | 368 +++++++++++++++++++++++++++
> arch/x86/kvm/mmu/rmap.h | 106 ++++++++
> 6 files changed, 492 insertions(+), 432 deletions(-)
> create mode 100644 arch/x86/kvm/mmu/rmap.c
> create mode 100644 arch/x86/kvm/mmu/rmap.h
>
> --
> 2.39.0.rc0.267.gcb52ba06e7-goog
>

2022-12-09 23:50:21

by David Matlack

[permalink] [raw]
Subject: Re: [PATCH 2/7] KVM: x86/MMU: Move rmap_iterator to rmap.h

On Tue, Dec 06, 2022 at 05:35:56PM +0000, Ben Gardon wrote:
> In continuing to factor the rmap out of mmu.c, move the rmap_iterator
> and associated functions and macros into rmap.(c|h).
>
> No functional change intended.
>
> Signed-off-by: Ben Gardon <[email protected]>
> ---
> arch/x86/kvm/mmu/mmu.c | 76 -----------------------------------------
> arch/x86/kvm/mmu/rmap.c | 61 +++++++++++++++++++++++++++++++++
> arch/x86/kvm/mmu/rmap.h | 18 ++++++++++
> 3 files changed, 79 insertions(+), 76 deletions(-)
>
[...]
> diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
> index 059765b6e066..13b265f3a95e 100644
> --- a/arch/x86/kvm/mmu/rmap.h
> +++ b/arch/x86/kvm/mmu/rmap.h
> @@ -31,4 +31,22 @@ void free_pte_list_desc(struct pte_list_desc *pte_list_desc);
> void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head);
> unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
>
> +/*
> + * Used by the following functions to iterate through the sptes linked by a
> + * rmap. All fields are private and not assumed to be used outside.
> + */
> +struct rmap_iterator {
> + /* private fields */
> + struct pte_list_desc *desc; /* holds the sptep if not NULL */
> + int pos; /* index of the sptep */
> +};
> +
> +u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
> + struct rmap_iterator *iter);
> +u64 *rmap_get_next(struct rmap_iterator *iter);
> +
> +#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \
> + for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \
> + _spte_; _spte_ = rmap_get_next(_iter_))
> +

I always found these function names and kvm_rmap_head confusing since
they are about iterating through the pte_list_desc data structure. The
rmap (gfn -> list of sptes) is a specific application of the
pte_list_desc structure, but not the only application. There's also
parent_ptes in struct kvm_mmu_page, which is not an rmap, just a plain
old list of ptes.

While you are refactoring this code, what do you think about doing the
following renames?

struct kvm_rmap_head -> struct pte_list_head
struct rmap_iterator -> struct pte_list_iterator
rmap_get_first() -> pte_list_get_first()
rmap_get_next() -> pte_list_get_next()
for_each_rmap_spte() -> for_each_pte_list_entry()

Then we can reserve the term "rmap" just for the actual rmap
(slot->arch.rmap), and code that deals with sp->parent_ptes will become
a lot more clear IMO (because it will not longer mention rmap).

e.g. We go from this:

struct rmap_iterator iter;
u64 *sptep;

for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
...
}

To this:

struct pte_list_iterator iter;
u64 *sptep;

for_each_pte_list_entry(&sp->parent_ptes, &iter, sptep) {
...
}

2022-12-10 01:10:22

by David Matlack

[permalink] [raw]
Subject: Re: [PATCH 3/7] KVM: x86/MMU: Move gfn_to_rmap() to rmap.c

On Tue, Dec 06, 2022 at 05:35:57PM +0000, Ben Gardon wrote:
> Move gfn_to_rmap() to rmap.c. While the function is not part of
> manipulating the rmap, it is the main way that the MMU gets pointers to
> the rmaps.
>
> No functional change intended.
>
> Signed-off-by: Ben Gardon <[email protected]>
> ---
[...]
> diff --git a/arch/x86/kvm/mmu/rmap.c b/arch/x86/kvm/mmu/rmap.c
> index c3bad366b627..272e89147d96 100644
> --- a/arch/x86/kvm/mmu/rmap.c
> +++ b/arch/x86/kvm/mmu/rmap.c
> @@ -200,3 +200,11 @@ u64 *rmap_get_next(struct rmap_iterator *iter)
> return sptep;
> }
>
> +struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
> + const struct kvm_memory_slot *slot)
> +{
> + unsigned long idx;
> +
> + idx = gfn_to_index(gfn, slot->base_gfn, level);
> + return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
> +}

Optional: Since this is such a short function maybe just make it a
static inline in rmap.h?

2022-12-14 00:16:57

by Ben Gardon

[permalink] [raw]
Subject: Re: [PATCH 2/7] KVM: x86/MMU: Move rmap_iterator to rmap.h

On Fri, Dec 9, 2022 at 3:04 PM David Matlack <[email protected]> wrote:
>
> On Tue, Dec 06, 2022 at 05:35:56PM +0000, Ben Gardon wrote:
> > In continuing to factor the rmap out of mmu.c, move the rmap_iterator
> > and associated functions and macros into rmap.(c|h).
> >
> > No functional change intended.
> >
> > Signed-off-by: Ben Gardon <[email protected]>
> > ---
> > arch/x86/kvm/mmu/mmu.c | 76 -----------------------------------------
> > arch/x86/kvm/mmu/rmap.c | 61 +++++++++++++++++++++++++++++++++
> > arch/x86/kvm/mmu/rmap.h | 18 ++++++++++
> > 3 files changed, 79 insertions(+), 76 deletions(-)
> >
> [...]
> > diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
> > index 059765b6e066..13b265f3a95e 100644
> > --- a/arch/x86/kvm/mmu/rmap.h
> > +++ b/arch/x86/kvm/mmu/rmap.h
> > @@ -31,4 +31,22 @@ void free_pte_list_desc(struct pte_list_desc *pte_list_desc);
> > void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head);
> > unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> >
> > +/*
> > + * Used by the following functions to iterate through the sptes linked by a
> > + * rmap. All fields are private and not assumed to be used outside.
> > + */
> > +struct rmap_iterator {
> > + /* private fields */
> > + struct pte_list_desc *desc; /* holds the sptep if not NULL */
> > + int pos; /* index of the sptep */
> > +};
> > +
> > +u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
> > + struct rmap_iterator *iter);
> > +u64 *rmap_get_next(struct rmap_iterator *iter);
> > +
> > +#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \
> > + for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \
> > + _spte_; _spte_ = rmap_get_next(_iter_))
> > +
>
> I always found these function names and kvm_rmap_head confusing since
> they are about iterating through the pte_list_desc data structure. The
> rmap (gfn -> list of sptes) is a specific application of the
> pte_list_desc structure, but not the only application. There's also
> parent_ptes in struct kvm_mmu_page, which is not an rmap, just a plain
> old list of ptes.
>
> While you are refactoring this code, what do you think about doing the
> following renames?
>
> struct kvm_rmap_head -> struct pte_list_head
> struct rmap_iterator -> struct pte_list_iterator
> rmap_get_first() -> pte_list_get_first()
> rmap_get_next() -> pte_list_get_next()
> for_each_rmap_spte() -> for_each_pte_list_entry()
>
> Then we can reserve the term "rmap" just for the actual rmap
> (slot->arch.rmap), and code that deals with sp->parent_ptes will become
> a lot more clear IMO (because it will not longer mention rmap).
>
> e.g. We go from this:
>
> struct rmap_iterator iter;
> u64 *sptep;
>
> for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
> ...
> }
>
> To this:
>
> struct pte_list_iterator iter;
> u64 *sptep;
>
> for_each_pte_list_entry(&sp->parent_ptes, &iter, sptep) {
> ...
> }

I like this suggestion, and I do think it'll make things more
readable. It's going to be a huge patch to rename all the instances of
kvm_rmap_head, but it's probably worth it.

2022-12-14 00:19:17

by Ben Gardon

[permalink] [raw]
Subject: Re: [PATCH 1/7] KVM: x86/MMU: Move pte_list operations to rmap.c

On Wed, Dec 7, 2022 at 2:58 PM Vipin Sharma <[email protected]> wrote:
>
> On Tue, Dec 6, 2022 at 9:36 AM Ben Gardon <[email protected]> wrote:
> >
> > In the interest of eventually splitting the Shadow MMU out of mmu.c,
> > start by moving some of the operations for manipulating pte_lists out of
> > mmu.c and into a new pair of files: rmap.c and rmap.h.
> >
> > No functional change intended.
> >
> > Signed-off-by: Ben Gardon <[email protected]>
> > ---
> > arch/x86/kvm/Makefile | 2 +-
> > arch/x86/kvm/debugfs.c | 1 +
> > arch/x86/kvm/mmu/mmu.c | 152 +-------------------------------
> > arch/x86/kvm/mmu/mmu_internal.h | 1 -
> > arch/x86/kvm/mmu/rmap.c | 141 +++++++++++++++++++++++++++++
> > arch/x86/kvm/mmu/rmap.h | 34 +++++++
> > 6 files changed, 179 insertions(+), 152 deletions(-)
> > create mode 100644 arch/x86/kvm/mmu/rmap.c
> > create mode 100644 arch/x86/kvm/mmu/rmap.h
> >
> > diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> > index 80e3fe184d17..9f766eebeddf 100644
> > --- a/arch/x86/kvm/Makefile
> > +++ b/arch/x86/kvm/Makefile
> > @@ -12,7 +12,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
> > kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
> > i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
> > hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
> > - mmu/spte.o
> > + mmu/spte.o mmu/rmap.o
> >
> > ifdef CONFIG_HYPERV
> > kvm-y += kvm_onhyperv.o
> > diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
> > index c1390357126a..29f692ecd6f3 100644
> > --- a/arch/x86/kvm/debugfs.c
> > +++ b/arch/x86/kvm/debugfs.c
> > @@ -9,6 +9,7 @@
> > #include "lapic.h"
> > #include "mmu.h"
> > #include "mmu/mmu_internal.h"
> > +#include "mmu/rmap.h"
> >
> > static int vcpu_get_timer_advance_ns(void *data, u64 *val)
> > {
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index 4736d7849c60..90b3735d6064 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -26,6 +26,7 @@
> > #include "kvm_emulate.h"
> > #include "cpuid.h"
> > #include "spte.h"
> > +#include "rmap.h"
> >
> > #include <linux/kvm_host.h>
> > #include <linux/types.h>
> > @@ -112,24 +113,6 @@ module_param(dbg, bool, 0644);
> >
> > #include <trace/events/kvm.h>
> >
> > -/* make pte_list_desc fit well in cache lines */
> > -#define PTE_LIST_EXT 14
> > -
> > -/*
> > - * Slight optimization of cacheline layout, by putting `more' and `spte_count'
> > - * at the start; then accessing it will only use one single cacheline for
> > - * either full (entries==PTE_LIST_EXT) case or entries<=6.
> > - */
> > -struct pte_list_desc {
> > - struct pte_list_desc *more;
> > - /*
> > - * Stores number of entries stored in the pte_list_desc. No need to be
> > - * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
> > - */
> > - u64 spte_count;
> > - u64 *sptes[PTE_LIST_EXT];
> > -};
> > -
> > struct kvm_shadow_walk_iterator {
> > u64 addr;
> > hpa_t shadow_addr;
> > @@ -155,7 +138,6 @@ struct kvm_shadow_walk_iterator {
> > ({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \
> > __shadow_walk_next(&(_walker), spte))
> >
> > -static struct kmem_cache *pte_list_desc_cache;
> > struct kmem_cache *mmu_page_header_cache;
> > static struct percpu_counter kvm_total_used_mmu_pages;
> >
> > @@ -674,11 +656,6 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
> > kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
> > }
> >
> > -static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
> > -{
> > - kmem_cache_free(pte_list_desc_cache, pte_list_desc);
> > -}
> > -
> > static bool sp_has_gptes(struct kvm_mmu_page *sp);
> >
> > static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
> > @@ -878,111 +855,6 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
> > return slot;
> > }
> >
> > -/*
> > - * About rmap_head encoding:
> > - *
> > - * If the bit zero of rmap_head->val is clear, then it points to the only spte
> > - * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
> > - * pte_list_desc containing more mappings.
> > - */
> > -
> > -/*
> > - * Returns the number of pointers in the rmap chain, not counting the new one.
> > - */
> > -static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
> > - struct kvm_rmap_head *rmap_head)
> > -{
> > - struct pte_list_desc *desc;
> > - int count = 0;
> > -
> > - if (!rmap_head->val) {
> > - rmap_printk("%p %llx 0->1\n", spte, *spte);
> > - rmap_head->val = (unsigned long)spte;
> > - } else if (!(rmap_head->val & 1)) {
> > - rmap_printk("%p %llx 1->many\n", spte, *spte);
> > - desc = kvm_mmu_memory_cache_alloc(cache);
> > - desc->sptes[0] = (u64 *)rmap_head->val;
> > - desc->sptes[1] = spte;
> > - desc->spte_count = 2;
> > - rmap_head->val = (unsigned long)desc | 1;
> > - ++count;
> > - } else {
> > - rmap_printk("%p %llx many->many\n", spte, *spte);
> > - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> > - while (desc->spte_count == PTE_LIST_EXT) {
> > - count += PTE_LIST_EXT;
> > - if (!desc->more) {
> > - desc->more = kvm_mmu_memory_cache_alloc(cache);
> > - desc = desc->more;
> > - desc->spte_count = 0;
> > - break;
> > - }
> > - desc = desc->more;
> > - }
> > - count += desc->spte_count;
> > - desc->sptes[desc->spte_count++] = spte;
> > - }
> > - return count;
> > -}
> > -
> > -static void
> > -pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
> > - struct pte_list_desc *desc, int i,
> > - struct pte_list_desc *prev_desc)
> > -{
> > - int j = desc->spte_count - 1;
> > -
> > - desc->sptes[i] = desc->sptes[j];
> > - desc->sptes[j] = NULL;
> > - desc->spte_count--;
> > - if (desc->spte_count)
> > - return;
> > - if (!prev_desc && !desc->more)
> > - rmap_head->val = 0;
> > - else
> > - if (prev_desc)
> > - prev_desc->more = desc->more;
> > - else
> > - rmap_head->val = (unsigned long)desc->more | 1;
> > - mmu_free_pte_list_desc(desc);
> > -}
> > -
> > -static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
> > -{
> > - struct pte_list_desc *desc;
> > - struct pte_list_desc *prev_desc;
> > - int i;
> > -
> > - if (!rmap_head->val) {
> > - pr_err("%s: %p 0->BUG\n", __func__, spte);
> > - BUG();
> > - } else if (!(rmap_head->val & 1)) {
> > - rmap_printk("%p 1->0\n", spte);
> > - if ((u64 *)rmap_head->val != spte) {
> > - pr_err("%s: %p 1->BUG\n", __func__, spte);
> > - BUG();
> > - }
> > - rmap_head->val = 0;
> > - } else {
> > - rmap_printk("%p many->many\n", spte);
> > - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> > - prev_desc = NULL;
> > - while (desc) {
> > - for (i = 0; i < desc->spte_count; ++i) {
> > - if (desc->sptes[i] == spte) {
> > - pte_list_desc_remove_entry(rmap_head,
> > - desc, i, prev_desc);
> > - return;
> > - }
> > - }
> > - prev_desc = desc;
> > - desc = desc->more;
> > - }
> > - pr_err("%s: %p many->many\n", __func__, spte);
> > - BUG();
> > - }
> > -}
> > -
> > static void kvm_zap_one_rmap_spte(struct kvm *kvm,
> > struct kvm_rmap_head *rmap_head, u64 *sptep)
> > {
> > @@ -1011,7 +883,7 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
> > for (i = 0; i < desc->spte_count; i++)
> > mmu_spte_clear_track_bits(kvm, desc->sptes[i]);
> > next = desc->more;
> > - mmu_free_pte_list_desc(desc);
> > + free_pte_list_desc(desc);
> > }
> > out:
> > /* rmap_head is meaningless now, remember to reset it */
> > @@ -1019,26 +891,6 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
> > return true;
> > }
> >
> > -unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
> > -{
> > - struct pte_list_desc *desc;
> > - unsigned int count = 0;
> > -
> > - if (!rmap_head->val)
> > - return 0;
> > - else if (!(rmap_head->val & 1))
> > - return 1;
> > -
> > - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> > -
> > - while (desc) {
> > - count += desc->spte_count;
> > - desc = desc->more;
> > - }
> > -
> > - return count;
> > -}
> > -
> > static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
> > const struct kvm_memory_slot *slot)
> > {
> > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > index dbaf6755c5a7..cd1c8f32269d 100644
> > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > @@ -166,7 +166,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
> > int min_level);
> > void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
> > u64 start_gfn, u64 pages);
> > -unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> >
> > extern int nx_huge_pages;
> > static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
> > diff --git a/arch/x86/kvm/mmu/rmap.c b/arch/x86/kvm/mmu/rmap.c
> > new file mode 100644
> > index 000000000000..daa99dee0709
> > --- /dev/null
> > +++ b/arch/x86/kvm/mmu/rmap.c
> > @@ -0,0 +1,141 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
>
> A comment would be nice to write expectations from this file and what
> code lives here.

I'll add one.

>
> > +#include "mmu.h"
> > +#include "mmu_internal.h"
> > +#include "mmutrace.h"
> > +#include "rmap.h"
> > +#include "spte.h"
> > +
> > +#include <asm/cmpxchg.h>
> > +#include <trace/events/kvm.h>
> > +
> > +/*
> > + * About rmap_head encoding:
> > + *
> > + * If the bit zero of rmap_head->val is clear, then it points to the only spte
> > + * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
> > + * pte_list_desc containing more mappings.
> > + */
> > +
> > +/*
> > + * Returns the number of pointers in the rmap chain, not counting the new one.
> > + */
> > +int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
> > + struct kvm_rmap_head *rmap_head)
> > +{
> > + struct pte_list_desc *desc;
> > + int count = 0;
> > +
> > + if (!rmap_head->val) {
> > + rmap_printk("%p %llx 0->1\n", spte, *spte);
> > + rmap_head->val = (unsigned long)spte;
> > + } else if (!(rmap_head->val & 1)) {
> > + rmap_printk("%p %llx 1->many\n", spte, *spte);
> > + desc = kvm_mmu_memory_cache_alloc(cache);
> > + desc->sptes[0] = (u64 *)rmap_head->val;
> > + desc->sptes[1] = spte;
> > + desc->spte_count = 2;
> > + rmap_head->val = (unsigned long)desc | 1;
> > + ++count;
> > + } else {
> > + rmap_printk("%p %llx many->many\n", spte, *spte);
> > + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> > + while (desc->spte_count == PTE_LIST_EXT) {
> > + count += PTE_LIST_EXT;
> > + if (!desc->more) {
> > + desc->more = kvm_mmu_memory_cache_alloc(cache);
> > + desc = desc->more;
> > + desc->spte_count = 0;
> > + break;
> > + }
> > + desc = desc->more;
> > + }
> > + count += desc->spte_count;
> > + desc->sptes[desc->spte_count++] = spte;
> > + }
> > + return count;
> > +}
> > +
> > +void free_pte_list_desc(struct pte_list_desc *pte_list_desc)
> > +{
> > + kmem_cache_free(pte_list_desc_cache, pte_list_desc);
> > +}
> > +
> > +static void
> > +pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
> > + struct pte_list_desc *desc, int i,
> > + struct pte_list_desc *prev_desc)
> > +{
> > + int j = desc->spte_count - 1;
> > +
> > + desc->sptes[i] = desc->sptes[j];
> > + desc->sptes[j] = NULL;
> > + desc->spte_count--;
> > + if (desc->spte_count)
> > + return;
> > + if (!prev_desc && !desc->more)
> > + rmap_head->val = 0;
> > + else
> > + if (prev_desc)
> > + prev_desc->more = desc->more;
> > + else
> > + rmap_head->val = (unsigned long)desc->more | 1;
> > + free_pte_list_desc(desc);
> > +}
> > +
> > +void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
> > +{
> > + struct pte_list_desc *desc;
> > + struct pte_list_desc *prev_desc;
> > + int i;
> > +
> > + if (!rmap_head->val) {
> > + pr_err("%s: %p 0->BUG\n", __func__, spte);
> > + BUG();
> > + } else if (!(rmap_head->val & 1)) {
> > + rmap_printk("%p 1->0\n", spte);
> > + if ((u64 *)rmap_head->val != spte) {
> > + pr_err("%s: %p 1->BUG\n", __func__, spte);
> > + BUG();
> > + }
> > + rmap_head->val = 0;
> > + } else {
> > + rmap_printk("%p many->many\n", spte);
> > + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> > + prev_desc = NULL;
> > + while (desc) {
> > + for (i = 0; i < desc->spte_count; ++i) {
> > + if (desc->sptes[i] == spte) {
> > + pte_list_desc_remove_entry(rmap_head,
> > + desc, i, prev_desc);
> > + return;
> > + }
> > + }
> > + prev_desc = desc;
> > + desc = desc->more;
> > + }
> > + pr_err("%s: %p many->many\n", __func__, spte);
> > + BUG();
> > + }
> > +}
> > +
> > +unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
> > +{
> > + struct pte_list_desc *desc;
> > + unsigned int count = 0;
> > +
> > + if (!rmap_head->val)
> > + return 0;
> > + else if (!(rmap_head->val & 1))
> > + return 1;
> > +
> > + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> > +
> > + while (desc) {
> > + count += desc->spte_count;
> > + desc = desc->more;
> > + }
> > +
> > + return count;
> > +}
> > +
> > diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
> > new file mode 100644
> > index 000000000000..059765b6e066
> > --- /dev/null
> > +++ b/arch/x86/kvm/mmu/rmap.h
> > @@ -0,0 +1,34 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#ifndef __KVM_X86_MMU_RMAP_H
> > +#define __KVM_X86_MMU_RMAP_H
> > +
> > +#include <linux/kvm_host.h>
> > +
> > +/* make pte_list_desc fit well in cache lines */
> > +#define PTE_LIST_EXT 14
> > +
> > +/*
> > + * Slight optimization of cacheline layout, by putting `more' and `spte_count'
> > + * at the start; then accessing it will only use one single cacheline for
> > + * either full (entries==PTE_LIST_EXT) case or entries<=6.
> > + */
> > +struct pte_list_desc {
> > + struct pte_list_desc *more;
> > + /*
> > + * Stores number of entries stored in the pte_list_desc. No need to be
> > + * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
> > + */
> > + u64 spte_count;
> > + u64 *sptes[PTE_LIST_EXT];
> > +};
> > +
> > +static struct kmem_cache *pte_list_desc_cache;
>
> Does it make sense to make it non static and extern here. Also, you
> can provide an init function which can be called from mmu.c?

Going to follow David's suggestion and leave it in mmu.c for now.

>
>
> > +
> > +int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
> > + struct kvm_rmap_head *rmap_head);
> > +void free_pte_list_desc(struct pte_list_desc *pte_list_desc);
> > +void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head);
> > +unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> > +
>
> Similar to tdp_mmu, and other rmap functions in next patches in the
> series should above functions be prefixed with "rmap_"?

I think I'm going to abandon the idea of having a seperate file for
rmap stuff and just have one, larger shadow mmu file with a variety of
names. I'll clean up the naming at the end of the series once
everything is moved over and the set of things being exported from the
shadow_mmu.c file has stabilized.

>
>
> > +#endif /* __KVM_X86_MMU_RMAP_H */
> > --
> > 2.39.0.rc0.267.gcb52ba06e7-goog
> >

2022-12-14 00:21:13

by Ben Gardon

[permalink] [raw]
Subject: Re: [PATCH 1/7] KVM: x86/MMU: Move pte_list operations to rmap.c

On Fri, Dec 9, 2022 at 2:22 PM David Matlack <[email protected]> wrote:
>
> On Tue, Dec 06, 2022 at 05:35:55PM +0000, Ben Gardon wrote:
> > In the interest of eventually splitting the Shadow MMU out of mmu.c,
> > start by moving some of the operations for manipulating pte_lists out of
> > mmu.c and into a new pair of files: rmap.c and rmap.h.
> >
> > No functional change intended.
> >
> > Signed-off-by: Ben Gardon <[email protected]>
> > ---
> [...]
> > diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
> > new file mode 100644
> > index 000000000000..059765b6e066
> > --- /dev/null
> > +++ b/arch/x86/kvm/mmu/rmap.h
> > @@ -0,0 +1,34 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#ifndef __KVM_X86_MMU_RMAP_H
> > +#define __KVM_X86_MMU_RMAP_H
> > +
> > +#include <linux/kvm_host.h>
> > +
> > +/* make pte_list_desc fit well in cache lines */
> > +#define PTE_LIST_EXT 14
> > +
> > +/*
> > + * Slight optimization of cacheline layout, by putting `more' and `spte_count'
> > + * at the start; then accessing it will only use one single cacheline for
> > + * either full (entries==PTE_LIST_EXT) case or entries<=6.
> > + */
> > +struct pte_list_desc {
> > + struct pte_list_desc *more;
> > + /*
> > + * Stores number of entries stored in the pte_list_desc. No need to be
> > + * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
> > + */
> > + u64 spte_count;
> > + u64 *sptes[PTE_LIST_EXT];
> > +};
> > +
> > +static struct kmem_cache *pte_list_desc_cache;
>
> The definition of pte_list_desc_cache needs to go in a C file since it's
> a global variable. Since it now needs to be accessed by more than once C
> file, drop the static. Then it can be accessed with extern.
>
> Since most of the code that sets up and deals with pte_list_desc_cache
> is still in mmu.c, my vote is to keep the definition there.
>
> i.e.
>
> mmu.c:
>
> struct kmem_cache *pte_list_desc_cache;
>
> rmap.c
>
> extern struct kmem_cache *pte_list_desc_cache;
>
> And no need for anything in rmap.h.

Right, good point. I'll fix that in the next edition.

2022-12-14 01:23:07

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH 2/7] KVM: x86/MMU: Move rmap_iterator to rmap.h

On Tue, Dec 13, 2022, Ben Gardon wrote:
> On Fri, Dec 9, 2022 at 3:04 PM David Matlack <[email protected]> wrote:
> >
> > > +/*
> > > + * Used by the following functions to iterate through the sptes linked by a
> > > + * rmap. All fields are private and not assumed to be used outside.
> > > + */
> > > +struct rmap_iterator {
> > > + /* private fields */
> > > + struct pte_list_desc *desc; /* holds the sptep if not NULL */
> > > + int pos; /* index of the sptep */
> > > +};
> > > +
> > > +u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
> > > + struct rmap_iterator *iter);
> > > +u64 *rmap_get_next(struct rmap_iterator *iter);
> > > +
> > > +#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \
> > > + for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \
> > > + _spte_; _spte_ = rmap_get_next(_iter_))
> > > +
> >
> > I always found these function names and kvm_rmap_head confusing since

Heh, you definitely aren't the only one.

> > they are about iterating through the pte_list_desc data structure. The
> > rmap (gfn -> list of sptes) is a specific application of the
> > pte_list_desc structure, but not the only application. There's also
> > parent_ptes in struct kvm_mmu_page, which is not an rmap, just a plain
> > old list of ptes.
>
> > While you are refactoring this code, what do you think about doing the
> > following renames?
> >
> > struct kvm_rmap_head -> struct pte_list_head
> > struct rmap_iterator -> struct pte_list_iterator
> > rmap_get_first() -> pte_list_get_first()
> > rmap_get_next() -> pte_list_get_next()
> > for_each_rmap_spte() -> for_each_pte_list_entry()

I would strongly prefer to keep "spte" in this one regardless of what other naming
changes we do (see below). Maybe just for_each_spte()? IMO, "pte_list_entry"
unnecessarily obfuscates that it's a list of SPTEs.

> > Then we can reserve the term "rmap" just for the actual rmap
> > (slot->arch.rmap), and code that deals with sp->parent_ptes will become
> > a lot more clear IMO (because it will not longer mention rmap).
> >
> > e.g. We go from this:
> >
> > struct rmap_iterator iter;
> > u64 *sptep;
> >
> > for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
> > ...
> > }
> >
> > To this:
> >
> > struct pte_list_iterator iter;
> > u64 *sptep;
> >
> > for_each_pte_list_entry(&sp->parent_ptes, &iter, sptep) {
> > ...
> > }
>
> I like this suggestion, and I do think it'll make things more
> readable. It's going to be a huge patch to rename all the instances of
> kvm_rmap_head, but it's probably worth it.

I generally like this idea too, but tying into my above comment, before jumping
in I think we should figure out what end state we want, i.e. get the bikeshedding
out of the way now to hopefully avoid dragging out a series while various things
get nitpicked.

E.g. if we if we just rename the structs and their macros, then we'll end up with
things like

static bool slot_rmap_write_protect(struct kvm *kvm,
struct pte_list_head *rmap_head,
const struct kvm_memory_slot *slot)
{
return rmap_write_protect(rmap_head, false);
}

which isn't terrible, but there's still opportunity for cleanup, e.g.
rmap_write_protect() could easily be sptes_write_protect() or write_protect_sptes().

That will generate a naming conflict of sorts with pte_list_head if we don't also
rename that to spte_list_head. And I think capturing that it's a list of SPTEs and
not guest PTEs will be helpful in general.

And if we rename pte_list_head, then we might as well commit 100% and use consisnent
nomenclature across the board, e.g. end up with

static bool sptes_clear_dirty(struct kvm *kvm, struct sptes_list_head *head,
const struct kvm_memory_slot *slot)
{
u64 *sptep;
struct spte_list_iterator iter;
bool flush = false;

for_each_spte(head, &iter, sptep) {
if (spte_ad_need_write_protect(*sptep))
flush |= spte_wrprot_for_clear_dirty(sptep);
else
flush |= spte_clear_dirty(sptep);
}

return flush;
}

versus the current

static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
const struct kvm_memory_slot *slot)
{
u64 *sptep;
struct rmap_iterator iter;
bool flush = false;

for_each_rmap_spte(rmap_head, &iter, sptep)
if (spte_ad_need_write_protect(*sptep))
flush |= spte_wrprot_for_clear_dirty(sptep);
else
flush |= spte_clear_dirty(sptep);

return flush;
}

2022-12-14 18:05:47

by Ben Gardon

[permalink] [raw]
Subject: Re: [PATCH 2/7] KVM: x86/MMU: Move rmap_iterator to rmap.h

On Tue, Dec 13, 2022 at 4:59 PM Sean Christopherson <[email protected]> wrote:
>
> On Tue, Dec 13, 2022, Ben Gardon wrote:
> > On Fri, Dec 9, 2022 at 3:04 PM David Matlack <[email protected]> wrote:
> > >
> > > > +/*
> > > > + * Used by the following functions to iterate through the sptes linked by a
> > > > + * rmap. All fields are private and not assumed to be used outside.
> > > > + */
> > > > +struct rmap_iterator {
> > > > + /* private fields */
> > > > + struct pte_list_desc *desc; /* holds the sptep if not NULL */
> > > > + int pos; /* index of the sptep */
> > > > +};
> > > > +
> > > > +u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
> > > > + struct rmap_iterator *iter);
> > > > +u64 *rmap_get_next(struct rmap_iterator *iter);
> > > > +
> > > > +#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \
> > > > + for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \
> > > > + _spte_; _spte_ = rmap_get_next(_iter_))
> > > > +
> > >
> > > I always found these function names and kvm_rmap_head confusing since
>
> Heh, you definitely aren't the only one.
>
> > > they are about iterating through the pte_list_desc data structure. The
> > > rmap (gfn -> list of sptes) is a specific application of the
> > > pte_list_desc structure, but not the only application. There's also
> > > parent_ptes in struct kvm_mmu_page, which is not an rmap, just a plain
> > > old list of ptes.
> >
> > > While you are refactoring this code, what do you think about doing the
> > > following renames?
> > >
> > > struct kvm_rmap_head -> struct pte_list_head
> > > struct rmap_iterator -> struct pte_list_iterator
> > > rmap_get_first() -> pte_list_get_first()
> > > rmap_get_next() -> pte_list_get_next()
> > > for_each_rmap_spte() -> for_each_pte_list_entry()
>
> I would strongly prefer to keep "spte" in this one regardless of what other naming
> changes we do (see below). Maybe just for_each_spte()? IMO, "pte_list_entry"
> unnecessarily obfuscates that it's a list of SPTEs.
>
> > > Then we can reserve the term "rmap" just for the actual rmap
> > > (slot->arch.rmap), and code that deals with sp->parent_ptes will become
> > > a lot more clear IMO (because it will not longer mention rmap).
> > >
> > > e.g. We go from this:
> > >
> > > struct rmap_iterator iter;
> > > u64 *sptep;
> > >
> > > for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
> > > ...
> > > }
> > >
> > > To this:
> > >
> > > struct pte_list_iterator iter;
> > > u64 *sptep;
> > >
> > > for_each_pte_list_entry(&sp->parent_ptes, &iter, sptep) {
> > > ...
> > > }
> >
> > I like this suggestion, and I do think it'll make things more
> > readable. It's going to be a huge patch to rename all the instances of
> > kvm_rmap_head, but it's probably worth it.
>
> I generally like this idea too, but tying into my above comment, before jumping
> in I think we should figure out what end state we want, i.e. get the bikeshedding
> out of the way now to hopefully avoid dragging out a series while various things
> get nitpicked.
>
> E.g. if we if we just rename the structs and their macros, then we'll end up with
> things like
>
> static bool slot_rmap_write_protect(struct kvm *kvm,
> struct pte_list_head *rmap_head,
> const struct kvm_memory_slot *slot)
> {
> return rmap_write_protect(rmap_head, false);
> }
>
> which isn't terrible, but there's still opportunity for cleanup, e.g.
> rmap_write_protect() could easily be sptes_write_protect() or write_protect_sptes().
>
> That will generate a naming conflict of sorts with pte_list_head if we don't also
> rename that to spte_list_head. And I think capturing that it's a list of SPTEs and
> not guest PTEs will be helpful in general.
>
> And if we rename pte_list_head, then we might as well commit 100% and use consisnent
> nomenclature across the board, e.g. end up with
>
> static bool sptes_clear_dirty(struct kvm *kvm, struct sptes_list_head *head,
> const struct kvm_memory_slot *slot)
> {
> u64 *sptep;
> struct spte_list_iterator iter;
> bool flush = false;
>
> for_each_spte(head, &iter, sptep) {
> if (spte_ad_need_write_protect(*sptep))
> flush |= spte_wrprot_for_clear_dirty(sptep);
> else
> flush |= spte_clear_dirty(sptep);
> }
>
> return flush;
> }
>
> versus the current
>
> static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
> const struct kvm_memory_slot *slot)
> {
> u64 *sptep;
> struct rmap_iterator iter;
> bool flush = false;
>
> for_each_rmap_spte(rmap_head, &iter, sptep)
> if (spte_ad_need_write_protect(*sptep))
> flush |= spte_wrprot_for_clear_dirty(sptep);
> else
> flush |= spte_clear_dirty(sptep);
>
> return flush;
> }

I'd be happy to see some consistent SPTE-based naming in the Shadow
MMU and more or less get rid of the rmap naming scheme. Once you
change to spte_list_head or whatever, the use of the actual rmap (an
array of spte_list_heads) becomes super narrow.

Given the potential for enormous scope creep on what's already going
to be a long series, I'm inclined to split this work into two parts:
1. Move code from mmu.c to shadow_mmu.c with minimal cleanups /
refactors / renames; just move the code
2. Clean up naming conventions: make the functions exported in
shadow_mmu.h consistent, get rid of the whole rmap naming scheme, etc.

That way git-blame will preserve context around the renames /
refactors which would be obfuscated if we did 2 before 1, and we can
reduce merge conflicts.

2022-12-15 01:08:20

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH 2/7] KVM: x86/MMU: Move rmap_iterator to rmap.h

On Wed, Dec 14, 2022, Ben Gardon wrote:
> On Tue, Dec 13, 2022 at 4:59 PM Sean Christopherson <[email protected]> wrote:
> > And if we rename pte_list_head, then we might as well commit 100% and use consisnent
> > nomenclature across the board, e.g. end up with

...

> I'd be happy to see some consistent SPTE-based naming in the Shadow
> MMU and more or less get rid of the rmap naming scheme. Once you
> change to spte_list_head or whatever, the use of the actual rmap (an
> array of spte_list_heads) becomes super narrow.

Yeah. And at least for me, the more literal "walk a list of SPTEs" is much
easier for me to wrap my head around than "walk rmaps".

> Given the potential for enormous scope creep on what's already going
> to be a long series, I'm inclined to split this work into two parts:
> 1. Move code from mmu.c to shadow_mmu.c with minimal cleanups /
> refactors / renames; just move the code
> 2. Clean up naming conventions: make the functions exported in
> shadow_mmu.h consistent, get rid of the whole rmap naming scheme, etc.
>
> That way git-blame will preserve context around the renames /
> refactors which would be obfuscated if we did 2 before 1,

+1

> and we can reduce merge conflicts.

That might be wishful thinking ;-)

One thought for the rename would be to gather all the reviews and feedback, and
then wait to send the final version until shortly before the merge window, i.e.
wait for everything else to land so that only future development gets affected.
That would give Paolo and I a bit of extra motiviation to get the x86 queue
solidified sooner than later too...

2022-12-28 08:18:54

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 1/7] KVM: x86/MMU: Move pte_list operations to rmap.c

Hi Ben,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on kvm/queue]
[also build test ERROR on mst-vhost/linux-next linus/master v6.2-rc1 next-20221226]
[cannot apply to kvm/linux-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Ben-Gardon/KVM-x86-MMU-Factor-rmap-operations-out-of-mmu-c/20221207-013733
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
patch link: https://lore.kernel.org/r/20221206173601.549281-2-bgardon%40google.com
patch subject: [PATCH 1/7] KVM: x86/MMU: Move pte_list operations to rmap.c
config: i386-randconfig-a004-20220606
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/22e700d90cb6d8c054c5cacffe0be568004918a8
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Ben-Gardon/KVM-x86-MMU-Factor-rmap-operations-out-of-mmu-c/20221207-013733
git checkout 22e700d90cb6d8c054c5cacffe0be568004918a8
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=i386 olddefconfig
make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

In file included from arch/x86/kvm/debugfs.c:12:
>> arch/x86/kvm/mmu/rmap.h:26:27: error: 'pte_list_desc_cache' defined but not used [-Werror=unused-variable]
26 | static struct kmem_cache *pte_list_desc_cache;
| ^~~~~~~~~~~~~~~~~~~
cc1: all warnings being treated as errors


vim +/pte_list_desc_cache +26 arch/x86/kvm/mmu/rmap.h

25
> 26 static struct kmem_cache *pte_list_desc_cache;
27

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (2.09 kB)
config (120.70 kB)
Download all attachments