I added further optimization based on Avi's advice and my rmap_pde work.
- patch [5-6]
The new test result was impressively good, see below, and THP page
invalidation was more than 5 times faster on my x86 machine.
I also noticed that GUI's responsiveness on the host during swapping the
guest's memory improved a lot.
Before:
...
19.852 us | __mmu_notifier_invalidate_range_start();
28.033 us | __mmu_notifier_invalidate_range_start();
19.066 us | __mmu_notifier_invalidate_range_start();
44.715 us | __mmu_notifier_invalidate_range_start();
31.613 us | __mmu_notifier_invalidate_range_start();
20.659 us | __mmu_notifier_invalidate_range_start();
19.979 us | __mmu_notifier_invalidate_range_start();
20.416 us | __mmu_notifier_invalidate_range_start();
20.632 us | __mmu_notifier_invalidate_range_start();
22.316 us | __mmu_notifier_invalidate_range_start();
...
After:
...
4.089 us | __mmu_notifier_invalidate_range_start();
4.096 us | __mmu_notifier_invalidate_range_start();
3.560 us | __mmu_notifier_invalidate_range_start();
3.376 us | __mmu_notifier_invalidate_range_start();
3.772 us | __mmu_notifier_invalidate_range_start();
3.353 us | __mmu_notifier_invalidate_range_start();
3.332 us | __mmu_notifier_invalidate_range_start();
3.332 us | __mmu_notifier_invalidate_range_start();
3.332 us | __mmu_notifier_invalidate_range_start();
3.337 us | __mmu_notifier_invalidate_range_start();
...
Takuya Yoshikawa (6):
KVM: MMU: Use __gfn_to_rmap() to clean up kvm_handle_hva()
KVM: Introduce hva_to_gfn_memslot() for kvm_handle_hva()
KVM: MMU: Make kvm_handle_hva() handle range of addresses
KVM: Introduce kvm_unmap_hva_range() for kvm_mmu_notifier_invalidate_range_start()
KVM: Separate rmap_pde from kvm_lpage_info->write_count
KVM: MMU: Avoid handling same rmap_pde in kvm_handle_hva_range()
arch/powerpc/include/asm/kvm_host.h | 2 +
arch/powerpc/kvm/book3s_64_mmu_hv.c | 42 +++++++++++++++-----
arch/x86/include/asm/kvm_host.h | 3 +-
arch/x86/kvm/mmu.c | 72 +++++++++++++++++++++++------------
arch/x86/kvm/x86.c | 13 ++++++-
include/linux/kvm_host.h | 8 ++++
virt/kvm/kvm_main.c | 3 +-
7 files changed, 104 insertions(+), 39 deletions(-)
--
1.7.5.4
We can treat every level uniformly.
Signed-off-by: Takuya Yoshikawa <[email protected]>
---
arch/x86/kvm/mmu.c | 12 ++++++------
1 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3b53d9e..d3e7e6a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1205,14 +1205,14 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
gfn_t gfn = memslot->base_gfn + gfn_offset;
- ret = handler(kvm, &memslot->rmap[gfn_offset], data);
+ ret = 0;
- for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
- struct kvm_lpage_info *linfo;
+ for (j = PT_PAGE_TABLE_LEVEL;
+ j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
+ unsigned long *rmapp;
- linfo = lpage_info_slot(gfn, memslot,
- PT_DIRECTORY_LEVEL + j);
- ret |= handler(kvm, &linfo->rmap_pde, data);
+ rmapp = __gfn_to_rmap(gfn, j, memslot);
+ ret |= handler(kvm, rmapp, data);
}
trace_kvm_age_page(hva, memslot, ret);
retval |= ret;
--
1.7.5.4
This restricts hva handling in mmu code and makes it easier to extend
kvm_handle_hva() so that it can treat a range of addresses later in this
patch series.
Signed-off-by: Takuya Yoshikawa <[email protected]>
Cc: Alexander Graf <[email protected]>
Cc: Paul Mackerras <[email protected]>
---
arch/powerpc/kvm/book3s_64_mmu_hv.c | 6 +++---
arch/x86/kvm/mmu.c | 3 +--
include/linux/kvm_host.h | 8 ++++++++
3 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d03eb6f..3703755 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -772,10 +772,10 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
end = start + (memslot->npages << PAGE_SHIFT);
if (hva >= start && hva < end) {
- gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+ gfn_t gfn = hva_to_gfn_memslot(hva, memslot);
+ gfn_t gfn_offset = gfn - memslot->base_gfn;
- ret = handler(kvm, &memslot->rmap[gfn_offset],
- memslot->base_gfn + gfn_offset);
+ ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
retval |= ret;
}
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d3e7e6a..b898bec 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1202,8 +1202,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
end = start + (memslot->npages << PAGE_SHIFT);
if (hva >= start && hva < end) {
- gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
- gfn_t gfn = memslot->base_gfn + gfn_offset;
+ gfn_t gfn = hva_to_gfn_memslot(hva, memslot);
ret = 0;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 27ac8a4..2e25d17 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -740,6 +740,14 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
}
+static inline gfn_t
+hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
+{
+ gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT;
+
+ return slot->base_gfn + gfn_offset;
+}
+
static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
gfn_t gfn)
{
--
1.7.5.4
When guest's memory is backed by THP pages, MMU notifier needs to call
kvm_unmap_hva(), which in turn leads to kvm_handle_hva(), in a loop to
invalidate a range of pages which constitute one huge page:
for each page
for each memslot
if page is in memslot
unmap using rmap
This means although every page in that range is expected to be found in
the same memslot, we are forced to check unrelated memslots many times.
If the guest has more memslots, the situation will become worse.
Furthermore, if the range does not include any pages in the guest's
memory, the loop over the pages will just consume extra time.
This patch, together with the following patches, solves this problem by
introducing kvm_handle_hva_range() which makes the loop look like this:
for each memslot
for each page in memslot
unmap using rmap
In this new processing, the actual work is converted to a loop over rmap
which is much more cache friendly than before.
Signed-off-by: Takuya Yoshikawa <[email protected]>
Cc: Alexander Graf <[email protected]>
Cc: Paul Mackerras <[email protected]>
---
arch/powerpc/kvm/book3s_64_mmu_hv.c | 31 +++++++++++++++++++++-------
arch/x86/kvm/mmu.c | 37 +++++++++++++++++++++++++---------
2 files changed, 50 insertions(+), 18 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 3703755..aec8046 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -756,9 +756,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
goto out_put;
}
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
- int (*handler)(struct kvm *kvm, unsigned long *rmapp,
- unsigned long gfn))
+static int kvm_handle_hva_range(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end,
+ int (*handler)(struct kvm *kvm,
+ unsigned long *rmapp,
+ unsigned long gfn))
{
int ret;
int retval = 0;
@@ -767,12 +770,17 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
- unsigned long start = memslot->userspace_addr;
- unsigned long end;
+ gfn_t gfn;
+ unsigned long hva_start, hva_end;
- end = start + (memslot->npages << PAGE_SHIFT);
- if (hva >= start && hva < end) {
- gfn_t gfn = hva_to_gfn_memslot(hva, memslot);
+ hva_start = max(start, memslot->userspace_addr);
+ hva_end = min(end, memslot->userspace_addr +
+ (memslot->npages << PAGE_SHIFT));
+ if (hva_start >= hva_end)
+ continue;
+
+ for (gfn = hva_to_gfn_memslot(hva_start, memslot);
+ gfn < hva_to_gfn_memslot(hva_end, memslot); gfn++) {
gfn_t gfn_offset = gfn - memslot->base_gfn;
ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
@@ -783,6 +791,13 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
return retval;
}
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+ int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn))
+{
+ return kvm_handle_hva_range(kvm, hva, hva + PAGE_SIZE, handler);
+}
+
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long gfn)
{
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b898bec..f995b8a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1183,10 +1183,13 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
return 0;
}
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
- unsigned long data,
- int (*handler)(struct kvm *kvm, unsigned long *rmapp,
- unsigned long data))
+static int kvm_handle_hva_range(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end,
+ unsigned long data,
+ int (*handler)(struct kvm *kvm,
+ unsigned long *rmapp,
+ unsigned long data))
{
int j;
int ret;
@@ -1197,13 +1200,17 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
- unsigned long start = memslot->userspace_addr;
- unsigned long end;
+ gfn_t gfn;
+ unsigned long hva_start, hva_end;
- end = start + (memslot->npages << PAGE_SHIFT);
- if (hva >= start && hva < end) {
- gfn_t gfn = hva_to_gfn_memslot(hva, memslot);
+ hva_start = max(start, memslot->userspace_addr);
+ hva_end = min(end, memslot->userspace_addr +
+ (memslot->npages << PAGE_SHIFT));
+ if (hva_start >= hva_end)
+ continue;
+ for (gfn = hva_to_gfn_memslot(hva_start, memslot);
+ gfn < hva_to_gfn_memslot(hva_end, memslot); gfn++) {
ret = 0;
for (j = PT_PAGE_TABLE_LEVEL;
@@ -1213,7 +1220,9 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
rmapp = __gfn_to_rmap(gfn, j, memslot);
ret |= handler(kvm, rmapp, data);
}
- trace_kvm_age_page(hva, memslot, ret);
+ trace_kvm_age_page(memslot->userspace_addr +
+ (gfn - memslot->base_gfn) * PAGE_SIZE,
+ memslot, ret);
retval |= ret;
}
}
@@ -1221,6 +1230,14 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
return retval;
}
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+ unsigned long data,
+ int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long data))
+{
+ return kvm_handle_hva_range(kvm, hva, hva + PAGE_SIZE, data, handler);
+}
+
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
{
return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
--
1.7.5.4
When we tested KVM under memory pressure, with THP enabled on the host,
we noticed that MMU notifier took a long time to invalidate huge pages.
Since the invalidation was done with mmu_lock held, it not only wasted
the CPU but also made the host harder to respond.
This patch mitigates this by using kvm_handle_hva_range().
Signed-off-by: Takuya Yoshikawa <[email protected]>
Cc: Alexander Graf <[email protected]>
Cc: Paul Mackerras <[email protected]>
---
arch/powerpc/include/asm/kvm_host.h | 2 ++
arch/powerpc/kvm/book3s_64_mmu_hv.c | 7 +++++++
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/mmu.c | 5 +++++
virt/kvm/kvm_main.c | 3 +--
5 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 50ea12f..572ad01 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -52,6 +52,8 @@
struct kvm;
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_unmap_hva_range(struct kvm *kvm,
+ unsigned long start, unsigned long end);
extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index aec8046..33a6dc2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -865,6 +865,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
return 0;
}
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ if (kvm->arch.using_mmu_notifiers)
+ kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+ return 0;
+}
+
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long gfn)
{
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index db7c1f2..d6ad50d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -930,6 +930,7 @@ extern bool kvm_rebooting;
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f995b8a..0659376 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1243,6 +1243,11 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
}
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
+}
+
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
{
kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 02cb440..44e44c0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -332,8 +332,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
* count is also read inside the mmu_lock critical section.
*/
kvm->mmu_notifier_count++;
- for (; start < end; start += PAGE_SIZE)
- need_tlb_flush |= kvm_unmap_hva(kvm, start);
+ need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
need_tlb_flush |= kvm->tlbs_dirty;
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
--
1.7.5.4
This makes it possible to loop over rmap_pde arrays in the same way as
we do over rmap so that we can optimize kvm_handle_hva_range() easily in
the following patch.
Signed-off-by: Takuya Yoshikawa <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/kvm/mmu.c | 6 +++---
arch/x86/kvm/x86.c | 11 +++++++++++
3 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d6ad50d..54c2378 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -487,11 +487,11 @@ struct kvm_vcpu_arch {
};
struct kvm_lpage_info {
- unsigned long rmap_pde;
int write_count;
};
struct kvm_arch_memory_slot {
+ unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
};
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0659376..306711a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -931,13 +931,13 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
{
- struct kvm_lpage_info *linfo;
+ unsigned long idx;
if (likely(level == PT_PAGE_TABLE_LEVEL))
return &slot->rmap[gfn - slot->base_gfn];
- linfo = lpage_info_slot(gfn, slot, level);
- return &linfo->rmap_pde;
+ idx = gfn_to_index(gfn, slot->base_gfn, level);
+ return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx];
}
/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6ed5983..e2f41d5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6303,6 +6303,10 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
int i;
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+ if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) {
+ kvm_kvfree(free->arch.rmap_pde[i]);
+ free->arch.rmap_pde[i] = NULL;
+ }
if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
kvm_kvfree(free->arch.lpage_info[i]);
free->arch.lpage_info[i] = NULL;
@@ -6322,6 +6326,11 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
lpages = gfn_to_index(slot->base_gfn + npages - 1,
slot->base_gfn, level) + 1;
+ slot->arch.rmap_pde[i] =
+ kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i]));
+ if (!slot->arch.rmap_pde[i])
+ goto out_free;
+
slot->arch.lpage_info[i] =
kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
if (!slot->arch.lpage_info[i])
@@ -6350,7 +6359,9 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
out_free:
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+ kvm_kvfree(slot->arch.rmap_pde[i]);
kvm_kvfree(slot->arch.lpage_info[i]);
+ slot->arch.rmap_pde[i] = NULL;
slot->arch.lpage_info[i] = NULL;
}
return -ENOMEM;
--
1.7.5.4
When we invalidate a THP page, we call the handler with the same
rmap_pde argument 512 times in the following loop:
for each guest page in the range
for each level
unmap using rmap
This patch avoids these extra handler calls by changing the loop order
like this:
for each level
for each rmap in the range
unmap using rmap
With the preceding patches in the patch series, this made THP page
invalidation more than 5 times faster on our x86 host: the host became
more responsive during swapping the guest's memory as a result.
Note: in the new code we could not use trace_kvm_age_page(), so we just
dropped the point from kvm_handle_hva_range().
Signed-off-by: Takuya Yoshikawa <[email protected]>
---
arch/x86/kvm/mmu.c | 39 ++++++++++++++++++++-------------------
1 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 306711a..462becb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1192,16 +1192,15 @@ static int kvm_handle_hva_range(struct kvm *kvm,
unsigned long data))
{
int j;
- int ret;
- int retval = 0;
+ int ret = 0;
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
- gfn_t gfn;
unsigned long hva_start, hva_end;
+ gfn_t gfn_start, gfn_end;
hva_start = max(start, memslot->userspace_addr);
hva_end = min(end, memslot->userspace_addr +
@@ -1209,25 +1208,27 @@ static int kvm_handle_hva_range(struct kvm *kvm,
if (hva_start >= hva_end)
continue;
- for (gfn = hva_to_gfn_memslot(hva_start, memslot);
- gfn < hva_to_gfn_memslot(hva_end, memslot); gfn++) {
- ret = 0;
+ gfn_start = hva_to_gfn_memslot(hva_start, memslot);
+ gfn_end = hva_to_gfn_memslot(hva_end, memslot);
- for (j = PT_PAGE_TABLE_LEVEL;
- j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
- unsigned long *rmapp;
+ for (j = PT_PAGE_TABLE_LEVEL;
+ j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
+ unsigned long idx, idx_end;
+ unsigned long *rmapp;
- rmapp = __gfn_to_rmap(gfn, j, memslot);
- ret |= handler(kvm, rmapp, data);
- }
- trace_kvm_age_page(memslot->userspace_addr +
- (gfn - memslot->base_gfn) * PAGE_SIZE,
- memslot, ret);
- retval |= ret;
+ idx = gfn_to_index(gfn_start, memslot->base_gfn, j);
+ idx_end = gfn_to_index(gfn_end, memslot->base_gfn, j);
+
+ rmapp = __gfn_to_rmap(gfn_start, j, memslot);
+
+ /* Handle the first one even if idx == idx_end. */
+ do {
+ ret |= handler(kvm, rmapp++, data);
+ } while (++idx < idx_end);
}
}
- return retval;
+ return ret;
}
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
--
1.7.5.4
On 21.06.2012, at 10:48, Takuya Yoshikawa wrote:
> I added further optimization based on Avi's advice and my rmap_pde work.
> - patch [5-6]
>
> The new test result was impressively good, see below, and THP page
> invalidation was more than 5 times faster on my x86 machine.
>
> I also noticed that GUI's responsiveness on the host during swapping the
> guest's memory improved a lot.
>
> Before:
> ...
> 19.852 us | __mmu_notifier_invalidate_range_start();
> 28.033 us | __mmu_notifier_invalidate_range_start();
> 19.066 us | __mmu_notifier_invalidate_range_start();
> 44.715 us | __mmu_notifier_invalidate_range_start();
> 31.613 us | __mmu_notifier_invalidate_range_start();
> 20.659 us | __mmu_notifier_invalidate_range_start();
> 19.979 us | __mmu_notifier_invalidate_range_start();
> 20.416 us | __mmu_notifier_invalidate_range_start();
> 20.632 us | __mmu_notifier_invalidate_range_start();
> 22.316 us | __mmu_notifier_invalidate_range_start();
> ...
>
> After:
> ...
> 4.089 us | __mmu_notifier_invalidate_range_start();
> 4.096 us | __mmu_notifier_invalidate_range_start();
> 3.560 us | __mmu_notifier_invalidate_range_start();
> 3.376 us | __mmu_notifier_invalidate_range_start();
> 3.772 us | __mmu_notifier_invalidate_range_start();
> 3.353 us | __mmu_notifier_invalidate_range_start();
> 3.332 us | __mmu_notifier_invalidate_range_start();
> 3.332 us | __mmu_notifier_invalidate_range_start();
> 3.332 us | __mmu_notifier_invalidate_range_start();
> 3.337 us | __mmu_notifier_invalidate_range_start();
> ...
>
> Takuya Yoshikawa (6):
> KVM: MMU: Use __gfn_to_rmap() to clean up kvm_handle_hva()
> KVM: Introduce hva_to_gfn_memslot() for kvm_handle_hva()
> KVM: MMU: Make kvm_handle_hva() handle range of addresses
> KVM: Introduce kvm_unmap_hva_range() for kvm_mmu_notifier_invalidate_range_start()
> KVM: Separate rmap_pde from kvm_lpage_info->write_count
> KVM: MMU: Avoid handling same rmap_pde in kvm_handle_hva_range()
>
> arch/powerpc/include/asm/kvm_host.h | 2 +
> arch/powerpc/kvm/book3s_64_mmu_hv.c | 42 +++++++++++++++-----
> arch/x86/include/asm/kvm_host.h | 3 +-
> arch/x86/kvm/mmu.c | 72 +++++++++++++++++++++++------------
> arch/x86/kvm/x86.c | 13 ++++++-
> include/linux/kvm_host.h | 8 ++++
> virt/kvm/kvm_main.c | 3 +-
> 7 files changed, 104 insertions(+), 39 deletions(-)
Paulus, please review the book3s_hv parts more closely. I didn't spot anything obviously wrong, but rmap magic is still slightly puzzling to me, so me reviewing there doesn't mean much :).
Alex
On Thu, 21 Jun 2012 17:52:38 +0900
Takuya Yoshikawa <[email protected]> wrote:
...
> + /* Handle the first one even if idx == idx_end. */
> + do {
> + ret |= handler(kvm, rmapp++, data);
> + } while (++idx < idx_end);
This is unclear and... maybe wrong.
I will change this as follows:
---
hva_start = max(start, memslot->userspace_addr);
hva_end = min(end, memslot->userspace_addr +
(memslot->npages << PAGE_SHIFT));
if (hva_start >= hva_end)
continue;
/*
* { gfn(page) | page intersects with [hva_start, hva_end) }
* = [gfn_start, gfn_end).
*/
gfn_start = hva_to_gfn_memslot(hva_start, memslot);
gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE-1, memslot);
for (j = PT_PAGE_TABLE_LEVEL;
j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
unsigned long idx, idx_end;
unsigned long *rmapp;
/*
* { idx(page_j) | page_j intersects with [hva_start, hva_end) }
* = [idx, idx_end].
*/
idx = gfn_to_index(gfn_start, memslot->base_gfn, j);
idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j);
rmapp = __gfn_to_rmap(gfn_start, j, memslot);
for (; idx <= idx_end; ++idx) {
ret |= handler(kvm, rmapp++, data);
}
}
---
This way, much clearly, we can handle exactly same rmaps as before.
I also need to change ppc code as well in patch 3.
Thanks,
Takuya