Hi,
This is the first part of the series which enables dumping of the guest
stage-2 pagetables. The support for dumping the host stage-2 pagetables
which is pKVM specific will be part of a follow-up series as per the
feedback received in v4.
When CONFIG_PTDUMP_STAGE2_DEBUGFS is enabled, ptdump registers
'/sys/debug/kvm/<guest_id>/stage2_page_tables' entry with debugfs
upon guest creation. This allows userspace tools (eg. cat) to dump the
stage-2 pagetables by reading the registered file.
Reading the debugfs file shows stage-2 memory ranges in following format:
<IPA range> <size> <descriptor type> <access permissions> <mem_attributes>
Below is the output of a guest stage-2 pagetable mappings running under
Qemu:
---[ IPA bits 33 start lvl 2 ]---
0x0000000000000000-0x0000000080000000 2G PGD
0x0000000080000000-0x0000000080c00000 12M PGD R W AF BLK
0x0000000080c00000-0x0000000080e00000 2M PGD XN R W AF BLK
0x0000000080e00000-0x0000000081000000 2M PGD R W AF BLK
0x0000000081000000-0x0000000081400000 4M PGD XN R W AF BLK
0x0000000081400000-0x000000008fe00000 234M PGD
0x000000008fe00000-0x0000000090000000 2M PGD XN R W AF BLK
0x0000000090000000-0x00000000fa000000 1696M PGD
0x00000000fa000000-0x00000000fe000000 64M PGD XN R W AF BLK
0x00000000fe000000-0x0000000100000000 32M PGD
0x0000000100000000-0x0000000101c00000 28M PGD XN R W AF BLK
0x0000000101c00000-0x0000000102000000 4M PGD
0x0000000102000000-0x0000000102200000 2M PGD XN R W AF BLK
0x0000000102200000-0x000000017b000000 1934M PGD
0x000000017b000000-0x0000000180000000 80M PGD XN R W AF BLK
Link to v5:
https://lore.kernel.org/all/[email protected]/
Link to v4:
https://lore.kernel.org/all/[email protected]/
Link to v3:
https://lore.kernel.org/all/[email protected]/
Changelog:
v5 -> current:
* don't return an error if the kvm_arch_create_vm_debugfs fails to
initialize (ref.
https://lore.kernel.org/all/[email protected]/)
* fix use-after-free suggested by getting a reference to the
KVM struct while manipulating the debugfs files
and put the reference on the file close.
* do all the allocations at once for the ptdump parser state tracking
and simplify the initialization.
* move the ptdump parser state initialization as part of the file_open
* create separate files for printing the guest stage-2 pagetable
configuration such as: the start level of the pagetable walk and the
number of bits used for the IPA space representation.
* fixed the wrong header format for the newly added file
* include missing patch which hasn't been posted on the v5:
"KVM-arm64-Move-pagetable-definitions-to-common-heade.patch"
v4 -> v5:
* refactorization: split the series into two parts as per the feedback
received from Oliver. Introduce the base support which allows dumping
of the guest stage-2 pagetables.
* removed the *ops* struct wrapper built on top of the file_ops and
simplify the ptdump interface access.
* keep the page table walker away from the ptdump specific code
v3 -> current_version:
* refactorization: moved all the **KVM** specific components under
kvm/ as suggested by Oliver. Introduced a new file
arm64/kvm/ptdump.c which handled the second stage translation.
re-used only the display portion from mm/ptdump.c
* pagetable snapshot creation now uses memory donated from the host.
The memory is no longer shared with the host as this can pose a security
risk if the host has access to manipulate the pagetable copy while
the hypervisor iterates it.
* fixed a memory leak: while memory was used from the memcache for
building the snapshot pagetable, it was no longer giving back the
pages to the host for freeing. A separate array was introduced to
keep track of the pages allocated from the memcache.
v2 -> v3:
* register the stage-2 debugfs entry for the host under
/sys/debug/kvm/host_stage2_page_tables and in
/sys/debug/kvm/<guest_id>/stage2_page_tables for guests.
* don't use a static array for parsing the attributes description,
generate it dynamically based on the number of pagetable levels
* remove the lock that was guarding the seq_file private inode data,
and keep the data private to the open file session.
* minor fixes & renaming of CONFIG_NVHE_EL2_PTDUMP_DEBUGFS to
CONFIG_PTDUMP_STAGE2_DEBUGFS
v1 -> v2:
* use the stage-2 pagetable walker for dumping descriptors instead of
the one provided by ptdump.
* support for guests pagetables dumping under VHE/nVHE non-protected
Thanks,
Sebastian Ene (6):
KVM: arm64: Move pagetable definitions to common header
arm64: ptdump: Expose the attribute parsing functionality
arm64: ptdump: Use the mask from the state structure
KVM: arm64: Register ptdump with debugfs on guest creation
KVM: arm64: Initialize the ptdump parser with stage-2 attributes
KVM: arm64: Expose guest stage-2 pagetable config to debugfs
arch/arm64/include/asm/kvm_pgtable.h | 42 +++++
arch/arm64/include/asm/ptdump.h | 42 ++++-
arch/arm64/kvm/Kconfig | 13 ++
arch/arm64/kvm/Makefile | 1 +
arch/arm64/kvm/debug.c | 8 +
arch/arm64/kvm/hyp/pgtable.c | 42 -----
arch/arm64/kvm/kvm_ptdump.h | 20 ++
arch/arm64/kvm/ptdump.c | 270 +++++++++++++++++++++++++++
arch/arm64/mm/ptdump.c | 49 +----
9 files changed, 405 insertions(+), 82 deletions(-)
create mode 100644 arch/arm64/kvm/kvm_ptdump.h
create mode 100644 arch/arm64/kvm/ptdump.c
--
2.44.0.rc0.258.g7320e95886-goog
To keep the same output format as the arch specific ptdump and for the
sake of reusability, move the parser's state tracking code out
of the arch specific.
Signed-off-by: Sebastian Ene <[email protected]>
---
arch/arm64/include/asm/ptdump.h | 41 ++++++++++++++++++++++++++++++++-
arch/arm64/mm/ptdump.c | 36 ++---------------------------
2 files changed, 42 insertions(+), 35 deletions(-)
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 581caac525b0..23510be35084 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -9,6 +9,8 @@
#include <linux/mm_types.h>
#include <linux/seq_file.h>
+#include <linux/ptdump.h>
+
struct addr_marker {
unsigned long start_address;
@@ -21,15 +23,52 @@ struct ptdump_info {
unsigned long base_addr;
};
+struct prot_bits {
+ u64 mask;
+ u64 val;
+ const char *set;
+ const char *clear;
+};
+
+struct pg_level {
+ const struct prot_bits *bits;
+ const char *name;
+ size_t num;
+ u64 mask;
+};
+
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
+struct pg_state {
+ struct ptdump_state ptdump;
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ unsigned long start_address;
+ int level;
+ u64 current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
+ unsigned long uxn_pages;
+};
+
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
+void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+ u64 val);
#ifdef CONFIG_PTDUMP_DEBUGFS
#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
static inline void ptdump_debugfs_register(struct ptdump_info *info,
const char *name) { }
-#endif
+#endif /* CONFIG_PTDUMP_DEBUGFS */
void ptdump_check_wx(void);
+#else
+static inline void note_page(void *pt_st, unsigned long addr,
+ int level, u64 val) { }
#endif /* CONFIG_PTDUMP_CORE */
#ifdef CONFIG_DEBUG_WX
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index e305b6593c4e..64127c70b109 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -66,31 +66,6 @@ static struct addr_marker address_markers[] = {
seq_printf(m, fmt); \
})
-/*
- * The page dumper groups page table entries of the same type into a single
- * description. It uses pg_state to track the range information while
- * iterating over the pte entries. When the continuity is broken it then
- * dumps out a description of the range.
- */
-struct pg_state {
- struct ptdump_state ptdump;
- struct seq_file *seq;
- const struct addr_marker *marker;
- unsigned long start_address;
- int level;
- u64 current_prot;
- bool check_wx;
- unsigned long wx_pages;
- unsigned long uxn_pages;
-};
-
-struct prot_bits {
- u64 mask;
- u64 val;
- const char *set;
- const char *clear;
-};
-
static const struct prot_bits pte_bits[] = {
{
.mask = PTE_VALID,
@@ -170,13 +145,6 @@ static const struct prot_bits pte_bits[] = {
}
};
-struct pg_level {
- const struct prot_bits *bits;
- const char *name;
- size_t num;
- u64 mask;
-};
-
static struct pg_level pg_level[] = {
{ /* pgd */
.name = "PGD",
@@ -248,8 +216,8 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}
-static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
- u64 val)
+void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+ u64 val)
{
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
static const char units[] = "KMGTPE";
--
2.44.0.rc0.258.g7320e95886-goog