2021-07-08 16:51:29

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v4 1/4] powerpc/ptdump: Use DEFINE_SHOW_ATTRIBUTE()

Use DEFINE_SHOW_ATTRIBUTE() instead of open coding
open() and fops.

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/mm/ptdump/bats.c | 14 ++------------
arch/powerpc/mm/ptdump/hashpagetable.c | 12 +-----------
arch/powerpc/mm/ptdump/ptdump.c | 13 +------------
arch/powerpc/mm/ptdump/segment_regs.c | 12 +-----------
4 files changed, 5 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
index c4c628b03cf8..4ed3418f07d9 100644
--- a/arch/powerpc/mm/ptdump/bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -57,7 +57,7 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool

#define BAT_SHOW_603(_m, _n, _l, _u, _d) bat_show_603(_m, _n, mfspr(_l), mfspr(_u), _d)

-static int bats_show_603(struct seq_file *m, void *v)
+static int bats_show(struct seq_file *m, void *v)
{
seq_puts(m, "---[ Instruction Block Address Translation ]---\n");

@@ -88,17 +88,7 @@ static int bats_show_603(struct seq_file *m, void *v)
return 0;
}

-static int bats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, bats_show_603, NULL);
-}
-
-static const struct file_operations bats_fops = {
- .open = bats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(bats);

static int __init bats_init(void)
{
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index ad6df9a2e7c8..c7f824d294b2 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -526,17 +526,7 @@ static int ptdump_show(struct seq_file *m, void *v)
return 0;
}

-static int ptdump_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
- .open = ptdump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ptdump);

static int ptdump_init(void)
{
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 5062c58b1e5b..349fd8fe173f 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -397,18 +397,7 @@ static int ptdump_show(struct seq_file *m, void *v)
return 0;
}

-
-static int ptdump_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
- .open = ptdump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ptdump);

static void build_pgtable_complete_mask(void)
{
diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c
index 565048a0c9be..3054944d3d7e 100644
--- a/arch/powerpc/mm/ptdump/segment_regs.c
+++ b/arch/powerpc/mm/ptdump/segment_regs.c
@@ -41,17 +41,7 @@ static int sr_show(struct seq_file *m, void *v)
return 0;
}

-static int sr_open(struct inode *inode, struct file *file)
-{
- return single_open(file, sr_show, NULL);
-}
-
-static const struct file_operations sr_fops = {
- .open = sr_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(sr);

static int __init sr_init(void)
{
--
2.25.0


2021-07-08 16:51:39

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v4 3/4] powerpc/ptdump: Reduce level numbers by 1 in note_page() and add p4d level

Do the same as commit f8f0d0b6fa20 ("mm: ptdump: reduce level numbers
by 1 in note_page()") and add missing p4d level.

This will align powerpc to the users of generic ptdump.

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/mm/ptdump/8xx.c | 6 ++++--
arch/powerpc/mm/ptdump/book3s64.c | 6 ++++--
arch/powerpc/mm/ptdump/ptdump.c | 17 +++++++++--------
arch/powerpc/mm/ptdump/shared.c | 6 ++++--
4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
index 86da2a669680..fac932eb8f9a 100644
--- a/arch/powerpc/mm/ptdump/8xx.c
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -75,8 +75,10 @@ static const struct flag_info flag_array[] = {
};

struct pgtable_level pg_level[5] = {
- {
- }, { /* pgd */
+ { /* pgd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* p4d */
.flag = flag_array,
.num = ARRAY_SIZE(flag_array),
}, { /* pud */
diff --git a/arch/powerpc/mm/ptdump/book3s64.c b/arch/powerpc/mm/ptdump/book3s64.c
index 14f73868db66..5ad92d9dc5d1 100644
--- a/arch/powerpc/mm/ptdump/book3s64.c
+++ b/arch/powerpc/mm/ptdump/book3s64.c
@@ -103,8 +103,10 @@ static const struct flag_info flag_array[] = {
};

struct pgtable_level pg_level[5] = {
- {
- }, { /* pgd */
+ { /* pgd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* p4d */
.flag = flag_array,
.num = ARRAY_SIZE(flag_array),
}, { /* pud */
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 3eb8732641da..fb531bc64fc5 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -58,7 +58,7 @@ struct pg_state {
const struct addr_marker *marker;
unsigned long start_address;
unsigned long start_pa;
- unsigned int level;
+ int level;
u64 current_flags;
bool check_wx;
unsigned long wx_pages;
@@ -188,10 +188,9 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}

-static void note_page_update_state(struct pg_state *st, unsigned long addr,
- unsigned int level, u64 val)
+static void note_page_update_state(struct pg_state *st, unsigned long addr, int level, u64 val)
{
- u64 flag = val & pg_level[level].mask;
+ u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
u64 pa = val & PTE_RPN_MASK;

st->level = level;
@@ -206,12 +205,12 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr,
}

static void note_page(struct pg_state *st, unsigned long addr,
- unsigned int level, u64 val, unsigned long page_size)
+ int level, u64 val, unsigned long page_size)
{
- u64 flag = val & pg_level[level].mask;
+ u64 flag = level >= 0 ? val & pg_level[level].mask : 0;

/* At first no level is set */
- if (!st->level) {
+ if (st->level == -1) {
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
note_page_update_state(st, addr, level, val);
/*
@@ -383,6 +382,7 @@ static int ptdump_show(struct seq_file *m, void *v)
struct pg_state st = {
.seq = m,
.marker = address_markers,
+ .level = -1,
.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
};

@@ -393,7 +393,7 @@ static int ptdump_show(struct seq_file *m, void *v)

/* Traverse kernel page tables */
walk_pagetables(&st);
- note_page(&st, 0, 0, 0, 0);
+ note_page(&st, 0, -1, 0, 0);
return 0;
}

@@ -415,6 +415,7 @@ void ptdump_check_wx(void)
struct pg_state st = {
.seq = NULL,
.marker = address_markers,
+ .level = -1,
.check_wx = true,
.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
};
diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
index c005fe041c18..03607ab90c66 100644
--- a/arch/powerpc/mm/ptdump/shared.c
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -68,8 +68,10 @@ static const struct flag_info flag_array[] = {
};

struct pgtable_level pg_level[5] = {
- {
- }, { /* pgd */
+ { /* pgd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* p4d */
.flag = flag_array,
.num = ARRAY_SIZE(flag_array),
}, { /* pud */
--
2.25.0

2021-07-08 16:53:49

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP

This patch converts powerpc to the generic PTDUMP implementation.

Signed-off-by: Christophe Leroy <[email protected]>
---
v4: Reworked init of ptdump range
---
arch/powerpc/Kconfig | 2 +
arch/powerpc/Kconfig.debug | 30 -------
arch/powerpc/mm/Makefile | 2 +-
arch/powerpc/mm/mmu_decl.h | 2 +-
arch/powerpc/mm/ptdump/Makefile | 9 +-
arch/powerpc/mm/ptdump/ptdump.c | 146 ++++++++------------------------
6 files changed, 47 insertions(+), 144 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 0104345d0a65..dc1ab533a1cf 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -123,6 +123,7 @@ config PPC
select ARCH_HAS_COPY_MC if PPC64
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
+ select ARCH_HAS_DEBUG_WX if STRICT_KERNEL_RWX
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_MAP_DIRECT if PPC_PSERIES
select ARCH_HAS_ELF_RANDOMIZE
@@ -182,6 +183,7 @@ config PPC
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
select GENERIC_PCI_IOMAP if PCI
+ select GENERIC_PTDUMP
select GENERIC_SMP_IDLE_THREAD
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 205cd77f321f..192f0ed0097f 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -365,36 +365,6 @@ config FAIL_IOMMU

If you are unsure, say N.

-config PPC_PTDUMP
- bool "Export kernel pagetable layout to userspace via debugfs"
- depends on DEBUG_KERNEL && DEBUG_FS
- help
- This option exports the state of the kernel pagetables to a
- debugfs file. This is only useful for kernel developers who are
- working in architecture specific areas of the kernel - probably
- not a good idea to enable this feature in a production kernel.
-
- If you are unsure, say N.
-
-config PPC_DEBUG_WX
- bool "Warn on W+X mappings at boot"
- depends on PPC_PTDUMP && STRICT_KERNEL_RWX
- help
- Generate a warning if any W+X mappings are found at boot.
-
- This is useful for discovering cases where the kernel is leaving
- W+X mappings after applying NX, as such mappings are a security risk.
-
- Note that even if the check fails, your kernel is possibly
- still fine, as W+X mappings are not a security hole in
- themselves, what they do is that they make the exploitation
- of other unfixed kernel bugs easier.
-
- There is no runtime or memory usage effect of this option
- once the kernel has booted up - it's a one time check.
-
- If in doubt, say "Y".
-
config PPC_FAST_ENDIAN_SWITCH
bool "Deprecated fast endian-switch syscall"
depends on DEBUG_KERNEL && PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index eae4ec2988fc..df8172da2301 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -18,5 +18,5 @@ obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
-obj-$(CONFIG_PPC_PTDUMP) += ptdump/
+obj-$(CONFIG_PTDUMP_CORE) += ptdump/
obj-$(CONFIG_KASAN) += kasan/
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 7dac910c0b21..dd1cabc2ea0f 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -180,7 +180,7 @@ static inline void mmu_mark_rodata_ro(void) { }
void __init mmu_mapin_immr(void);
#endif

-#ifdef CONFIG_PPC_DEBUG_WX
+#ifdef CONFIG_DEBUG_WX
void ptdump_check_wx(void);
#else
static inline void ptdump_check_wx(void) { }
diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile
index 712762be3cb1..4050cbb55acf 100644
--- a/arch/powerpc/mm/ptdump/Makefile
+++ b/arch/powerpc/mm/ptdump/Makefile
@@ -5,5 +5,10 @@ obj-y += ptdump.o
obj-$(CONFIG_4xx) += shared.o
obj-$(CONFIG_PPC_8xx) += 8xx.o
obj-$(CONFIG_PPC_BOOK3E_MMU) += shared.o
-obj-$(CONFIG_PPC_BOOK3S_32) += shared.o bats.o segment_regs.o
-obj-$(CONFIG_PPC_BOOK3S_64) += book3s64.o hashpagetable.o
+obj-$(CONFIG_PPC_BOOK3S_32) += shared.o
+obj-$(CONFIG_PPC_BOOK3S_64) += book3s64.o
+
+ifdef CONFIG_PTDUMP_DEBUGFS
+obj-$(CONFIG_PPC_BOOK3S_32) += bats.o segment_regs.o
+obj-$(CONFIG_PPC_BOOK3S_64) += hashpagetable.o
+endif
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index fb531bc64fc5..2d80d775d15e 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -16,6 +16,7 @@
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/highmem.h>
+#include <linux/ptdump.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <asm/fixmap.h>
@@ -54,6 +55,7 @@
*
*/
struct pg_state {
+ struct ptdump_state ptdump;
struct seq_file *seq;
const struct addr_marker *marker;
unsigned long start_address;
@@ -102,6 +104,11 @@ static struct addr_marker address_markers[] = {
{ -1, NULL },
};

+static struct ptdump_range ptdump_range[] __ro_after_init = {
+ {TASK_SIZE_MAX, ~0UL},
+ {0, 0}
+};
+
#define pt_dump_seq_printf(m, fmt, args...) \
({ \
if (m) \
@@ -204,10 +211,10 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr, int
}
}

-static void note_page(struct pg_state *st, unsigned long addr,
- int level, u64 val, unsigned long page_size)
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
{
u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
+ struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);

/* At first no level is set */
if (st->level == -1) {
@@ -245,94 +252,6 @@ static void note_page(struct pg_state *st, unsigned long addr,
}
}

-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
-{
- pte_t *pte = pte_offset_kernel(pmd, 0);
- unsigned long addr;
- unsigned int i;
-
- for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
- addr = start + i * PAGE_SIZE;
- note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE);
-
- }
-}
-
-static void walk_hugepd(struct pg_state *st, hugepd_t *phpd, unsigned long start,
- int pdshift, int level)
-{
-#ifdef CONFIG_ARCH_HAS_HUGEPD
- unsigned int i;
- int shift = hugepd_shift(*phpd);
- int ptrs_per_hpd = pdshift - shift > 0 ? 1 << (pdshift - shift) : 1;
-
- if (start & ((1 << shift) - 1))
- return;
-
- for (i = 0; i < ptrs_per_hpd; i++) {
- unsigned long addr = start + (i << shift);
- pte_t *pte = hugepte_offset(*phpd, addr, pdshift);
-
- note_page(st, addr, level + 1, pte_val(*pte), 1 << shift);
- }
-#endif
-}
-
-static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
-{
- pmd_t *pmd = pmd_offset(pud, 0);
- unsigned long addr;
- unsigned int i;
-
- for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
- addr = start + i * PMD_SIZE;
- if (!pmd_none(*pmd) && !pmd_is_leaf(*pmd))
- /* pmd exists */
- walk_pte(st, pmd, addr);
- else
- note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE);
- }
-}
-
-static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start)
-{
- pud_t *pud = pud_offset(p4d, 0);
- unsigned long addr;
- unsigned int i;
-
- for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
- addr = start + i * PUD_SIZE;
- if (!pud_none(*pud) && !pud_is_leaf(*pud))
- /* pud exists */
- walk_pmd(st, pud, addr);
- else
- note_page(st, addr, 2, pud_val(*pud), PUD_SIZE);
- }
-}
-
-static void walk_pagetables(struct pg_state *st)
-{
- unsigned int i;
- unsigned long addr = st->start_address & PGDIR_MASK;
- pgd_t *pgd = pgd_offset_k(addr);
-
- /*
- * Traverse the linux pagetable structure and dump pages that are in
- * the hash pagetable.
- */
- for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
- p4d_t *p4d = p4d_offset(pgd, 0);
-
- if (p4d_none(*p4d) || p4d_is_leaf(*p4d))
- note_page(st, addr, 1, p4d_val(*p4d), PGDIR_SIZE);
- else if (is_hugepd(__hugepd(p4d_val(*p4d))))
- walk_hugepd(st, (hugepd_t *)p4d, addr, PGDIR_SHIFT, 1);
- else
- /* p4d exists */
- walk_pud(st, p4d, addr);
- }
-}
-
static void populate_markers(void)
{
int i = 0;
@@ -383,17 +302,14 @@ static int ptdump_show(struct seq_file *m, void *v)
.seq = m,
.marker = address_markers,
.level = -1,
- .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
+ .ptdump = {
+ .note_page = note_page,
+ .range = ptdump_range,
+ }
};

-#ifdef CONFIG_PPC64
- if (!radix_enabled())
- st.start_address = KERN_VIRT_START;
-#endif
-
/* Traverse kernel page tables */
- walk_pagetables(&st);
- note_page(&st, 0, -1, 0, 0);
+ ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
return 0;
}

@@ -409,23 +325,24 @@ static void build_pgtable_complete_mask(void)
pg_level[i].mask |= pg_level[i].flag[j].mask;
}

-#ifdef CONFIG_PPC_DEBUG_WX
+#ifdef CONFIG_DEBUG_WX
void ptdump_check_wx(void)
{
struct pg_state st = {
.seq = NULL,
- .marker = address_markers,
+ .marker = (struct addr_marker[]) {
+ { 0, NULL},
+ { -1, NULL},
+ },
.level = -1,
.check_wx = true,
- .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
+ .ptdump = {
+ .note_page = note_page,
+ .range = ptdump_range,
+ }
};

-#ifdef CONFIG_PPC64
- if (!radix_enabled())
- st.start_address = KERN_VIRT_START;
-#endif
-
- walk_pagetables(&st);
+ ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);

if (st.wx_pages)
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
@@ -435,12 +352,21 @@ void ptdump_check_wx(void)
}
#endif

-static int ptdump_init(void)
+static int __init ptdump_init(void)
{
+#ifdef CONFIG_PPC64
+ if (!radix_enabled())
+ ptdump_range[0].start = KERN_VIRT_START;
+ else
+ ptdump_range[0].start = PAGE_OFFSET;
+#endif
+
populate_markers();
build_pgtable_complete_mask();
- debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
- &ptdump_fops);
+
+ if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS))
+ debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+
return 0;
}
device_initcall(ptdump_init);
--
2.25.0

2021-08-27 13:24:54

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v4 1/4] powerpc/ptdump: Use DEFINE_SHOW_ATTRIBUTE()

On Thu, 8 Jul 2021 16:49:40 +0000 (UTC), Christophe Leroy wrote:
> Use DEFINE_SHOW_ATTRIBUTE() instead of open coding
> open() and fops.
>
>
>
>

Applied to powerpc/next.

[1/4] powerpc/ptdump: Use DEFINE_SHOW_ATTRIBUTE()
https://git.kernel.org/powerpc/c/11f27a7fa4ca27935de74e3eb052bdc430d5f8d8
[2/4] powerpc/ptdump: Remove unused 'page_size' parameter
https://git.kernel.org/powerpc/c/64b87b0c70e0fd28352895cba3c0a9631e0072dd
[3/4] powerpc/ptdump: Reduce level numbers by 1 in note_page() and add p4d level
https://git.kernel.org/powerpc/c/cf98d2b6eea6a1b2c43f85680ad58fcc3ea9496b
[4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP
https://git.kernel.org/powerpc/c/e084728393a58e7fdafeee2e6b20e0faff09b557

cheers

2021-08-29 19:02:14

by Nathan Chancellor

[permalink] [raw]
Subject: Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP

Hi Christophe,

On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
> This patch converts powerpc to the generic PTDUMP implementation.
>
> Signed-off-by: Christophe Leroy <[email protected]>

This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
config [1] when booting up in QEMU with [2]:

[ 1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
[ 1.623058] Faulting instruction address: 0xc00000000045e5fc
[ 1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
[ 1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
[ 1.625015] Modules linked in:
[ 1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
[ 1.626237] NIP: c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
[ 1.626839] REGS: c00000000752b820 TRAP: 0380 Not tainted (5.14.0-rc7-next-20210827)
[ 1.627528] MSR: 9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE> CR: 84002482 XER: 20000000
[ 1.628449] CFAR: c000000000518300 IRQMASK: 0
[ 1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
[ 1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
[ 1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
[ 1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
[ 1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
[ 1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
[ 1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
[ 1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
[ 1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
[ 1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
[ 1.635755] Call Trace:
[ 1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
[ 1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
[ 1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
[ 1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
[ 1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
[ 1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
[ 1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
[ 1.640597] Instruction dump:
[ 1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
[ 1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
[ 1.642771] ---[ end trace 6cf72b085097ad52 ]---
[ 1.643220]
[ 2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[ 2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---

This is not compiler specific, I can reproduce it with GCC 11.2.0 and
binutils 2.37. If there is any additional information I can provide,
please let me know.

[1]: https://src.fedoraproject.org/rpms/kernel/raw/rawhide/f/kernel-ppc64le-fedora.config
[2]: https://github.com/ClangBuiltLinux/boot-utils

Cheers,
Nathan

2021-08-29 19:13:49

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP

Hi Nathan,

Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
> Hi Christophe,
>
> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>> This patch converts powerpc to the generic PTDUMP implementation.
>>
>> Signed-off-by: Christophe Leroy <[email protected]>
>
> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
> config [1] when booting up in QEMU with [2]:
>
> [ 1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
> [ 1.623058] Faulting instruction address: 0xc00000000045e5fc
> [ 1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
> [ 1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
> [ 1.625015] Modules linked in:
> [ 1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
> [ 1.626237] NIP: c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
> [ 1.626839] REGS: c00000000752b820 TRAP: 0380 Not tainted (5.14.0-rc7-next-20210827)
> [ 1.627528] MSR: 9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE> CR: 84002482 XER: 20000000
> [ 1.628449] CFAR: c000000000518300 IRQMASK: 0
> [ 1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
> [ 1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
> [ 1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
> [ 1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
> [ 1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
> [ 1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
> [ 1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
> [ 1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
> [ 1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
> [ 1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
> [ 1.635755] Call Trace:
> [ 1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
> [ 1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
> [ 1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
> [ 1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
> [ 1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
> [ 1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
> [ 1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
> [ 1.640597] Instruction dump:
> [ 1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
> [ 1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
> [ 1.642771] ---[ end trace 6cf72b085097ad52 ]---
> [ 1.643220]
> [ 2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> [ 2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>
> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
> binutils 2.37. If there is any additional information I can provide,
> please let me know.

Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.

Thanks
Christophe


>
> [1]: https://src.fedoraproject.org/rpms/kernel/raw/rawhide/f/kernel-ppc64le-fedora.config
> [2]: https://github.com/ClangBuiltLinux/boot-utils
>
> Cheers,
> Nathan
>

2021-08-29 21:40:31

by Nathan Chancellor

[permalink] [raw]
Subject: Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP

On Sun, Aug 29, 2021 at 09:11:47PM +0200, Christophe Leroy wrote:
> Hi Nathan,
>
> Le 29/08/2021 ? 20:55, Nathan Chancellor a ?crit?:
> > Hi Christophe,
> >
> > On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
> > > This patch converts powerpc to the generic PTDUMP implementation.
> > >
> > > Signed-off-by: Christophe Leroy <[email protected]>
> >
> > This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
> > GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
> > config [1] when booting up in QEMU with [2]:
> >
> > [ 1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
> > [ 1.623058] Faulting instruction address: 0xc00000000045e5fc
> > [ 1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
> > [ 1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
> > [ 1.625015] Modules linked in:
> > [ 1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
> > [ 1.626237] NIP: c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
> > [ 1.626839] REGS: c00000000752b820 TRAP: 0380 Not tainted (5.14.0-rc7-next-20210827)
> > [ 1.627528] MSR: 9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE> CR: 84002482 XER: 20000000
> > [ 1.628449] CFAR: c000000000518300 IRQMASK: 0
> > [ 1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
> > [ 1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
> > [ 1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
> > [ 1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
> > [ 1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
> > [ 1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
> > [ 1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
> > [ 1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
> > [ 1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
> > [ 1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
> > [ 1.635755] Call Trace:
> > [ 1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
> > [ 1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
> > [ 1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
> > [ 1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
> > [ 1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
> > [ 1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
> > [ 1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
> > [ 1.640597] Instruction dump:
> > [ 1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
> > [ 1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
> > [ 1.642771] ---[ end trace 6cf72b085097ad52 ]---
> > [ 1.643220]
> > [ 2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> > [ 2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
> >
> > This is not compiler specific, I can reproduce it with GCC 11.2.0 and
> > binutils 2.37. If there is any additional information I can provide,
> > please let me know.
>
> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.

Sure thing!

Disassembly of mm/pagewalk.o: https://gist.github.com/2cc2cadc598fe55b0f5cea0d75e89186

vmlinux binary (zstd compressed, 123MB): https://1drv.ms/u/s!AsQNYeB-IEbqjjai5EiHUBiPYzI3?e=kqUwpN

Cheers,
Nathan

2021-08-30 07:54:49

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP

Christophe Leroy <[email protected]> writes:
> Hi Nathan,
>
> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>> Hi Christophe,
>>
>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>
>>> Signed-off-by: Christophe Leroy <[email protected]>
>>
>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>> config [1] when booting up in QEMU with [2]:
>>
>> [ 1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>> [ 1.623058] Faulting instruction address: 0xc00000000045e5fc
>> [ 1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>> [ 1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>> [ 1.625015] Modules linked in:
>> [ 1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>> [ 1.626237] NIP: c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>> [ 1.626839] REGS: c00000000752b820 TRAP: 0380 Not tainted (5.14.0-rc7-next-20210827)
>> [ 1.627528] MSR: 9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE> CR: 84002482 XER: 20000000
>> [ 1.628449] CFAR: c000000000518300 IRQMASK: 0
>> [ 1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>> [ 1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>> [ 1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>> [ 1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>> [ 1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>> [ 1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>> [ 1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>> [ 1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>> [ 1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>> [ 1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>> [ 1.635755] Call Trace:
>> [ 1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>> [ 1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>> [ 1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>> [ 1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>> [ 1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>> [ 1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>> [ 1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>> [ 1.640597] Instruction dump:
>> [ 1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>> [ 1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>> [ 1.642771] ---[ end trace 6cf72b085097ad52 ]---
>> [ 1.643220]
>> [ 2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>> [ 2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>
>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>> binutils 2.37. If there is any additional information I can provide,
>> please let me know.
>
> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.

It seems to be walking of the end of the pgd.

[ 3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
[ 3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000 <- end of pgd at PAGE_OFFSET + 4PB
[ 3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
[ 3.373957] walk_p4d_range: addr c010008000000000 end c010010000000000
[ 3.374009] walk_p4d_range: addr c010010000000000 end c010018000000000
[ 3.374060] walk_p4d_range: addr c010018000000000 end c010020000000000
[ 3.376727] walk_p4d_range: addr c010020000000000 end c010028000000000
[ 3.376780] walk_p4d_range: addr c010028000000000 end c010030000000000
[ 3.376831] walk_p4d_range: addr c010030000000000 end c010038000000000
[ 3.376883] walk_p4d_range: addr c010038000000000 end c010040000000000
[ 3.376935] walk_p4d_range: addr c010040000000000 end c010048000000000
[ 3.376988] walk_p4d_range: addr c010048000000000 end c010050000000000
[ 3.377039] walk_p4d_range: addr c010050000000000 end c010058000000000
[ 3.377091] walk_p4d_range: addr c010058000000000 end c010060000000000
[ 3.377143] walk_p4d_range: addr c010060000000000 end c010068000000000
[ 3.377244] walk_pud_range: addr c010060000000000 end c010068000000000
[ 3.377374] walk_pmd_range: addr c010060100000000 end c010060140000000
[ 3.377817] BUG: Unable to handle kernel data access on read at 0xf906a038d8ba8400
[ 3.378247] Faulting instruction address: 0xc00000000045b4a4
[ 3.378725] Oops: Kernel access of bad area, sig: 11 [#1]
[ 3.378843] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
[ 3.379118] Modules linked in:
[ 3.379422] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc2+ #75
[ 3.379751] NIP: c00000000045b4a4 LR: c00000000045b430 CTR: c000000000b4b580
[ 3.379833] REGS: c0000000085637c0 TRAP: 0300 Not tainted (5.14.0-rc2+)
[ 3.379940] MSR: 8000000002009033 <SF,VEC,EE,ME,IR,DR,RI,LE> CR: 8800228f XER: 20040000
[ 3.380284] CFAR: c0000000001f5744 DAR: f906a038d8ba8400 DSISR: 40000000 IRQMASK: 0
[ 3.380284] GPR00: c00000000045b430 c000000008563a60 c0000000028a7d00 000000000000003a
[ 3.380284] GPR04: 00000000ffffe14d c000000008563748 ffffffffffffffff 00000000000001ff
[ 3.380284] GPR08: f906a038d8ba8400 c0100601001fffff c01006013fffffff fffffffffffc51e0
[ 3.380284] GPR12: 0000000000002000 c0000000ffffee80 0000000000000001 c000000008563be0
[ 3.380284] GPR16: c000000001198118 c0000000028daef8 c000000002971a60 c0000000014bc868
[ 3.380284] GPR20: c010060100200000 c000000002971a58 c000000002971a68 c010060100000000
[ 3.380284] GPR24: 0000000000000003 c000000001198118 c000000001198118 c000000001000020
[ 3.380284] GPR28: 0000000000000000 c010060140000000 c010068000000000 f906a038d8ba8400
[ 3.381235] NIP [c00000000045b4a4] __walk_page_range+0x7f4/0xbd0
[ 3.381906] LR [c00000000045b430] __walk_page_range+0x780/0xbd0
[ 3.382120] Call Trace:
[ 3.382240] [c000000008563a60] [c00000000045b430] __walk_page_range+0x780/0xbd0 (unreliable)
[ 3.382445] [c000000008563bc0] [c00000000045ba94] walk_page_range_novma+0x74/0xb0
[ 3.382548] [c000000008563c10] [c000000000514cd8] ptdump_walk_pgd+0x98/0x170
[ 3.382630] [c000000008563c60] [c0000000000aaf70] ptdump_check_wx+0xb0/0x100
[ 3.382774] [c000000008563d40] [c00000000008db18] mark_rodata_ro+0x48/0x80
[ 3.382849] [c000000008563da0] [c000000000012a18] kernel_init+0x78/0x1c0
[ 3.382926] [c000000008563e10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
[ 3.383092] Instruction dump:
[ 3.383341] 78c8f00e 7fe84a14 e9360000 e94100a8 39290010 7dc94836 7e89ba14 7d2900d0
[ 3.383516] 7e944838 3934ffff 7c295040 40800098 <e93f0000> 2c290000 40820094 e9990028
[ 3.384126] ---[ end trace d8e6479034d7a9d1 ]---

cheers

2021-08-30 08:18:07

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP



Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
> Christophe Leroy <[email protected]> writes:
>> Hi Nathan,
>>
>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>> Hi Christophe,
>>>
>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>
>>>> Signed-off-by: Christophe Leroy <[email protected]>
>>>
>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>> config [1] when booting up in QEMU with [2]:
>>>
>>> [ 1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>> [ 1.623058] Faulting instruction address: 0xc00000000045e5fc
>>> [ 1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>> [ 1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>> [ 1.625015] Modules linked in:
>>> [ 1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>> [ 1.626237] NIP: c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>> [ 1.626839] REGS: c00000000752b820 TRAP: 0380 Not tainted (5.14.0-rc7-next-20210827)
>>> [ 1.627528] MSR: 9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE> CR: 84002482 XER: 20000000
>>> [ 1.628449] CFAR: c000000000518300 IRQMASK: 0
>>> [ 1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>> [ 1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>> [ 1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>> [ 1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>> [ 1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>> [ 1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>> [ 1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>> [ 1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>> [ 1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>> [ 1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>> [ 1.635755] Call Trace:
>>> [ 1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>> [ 1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>> [ 1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>> [ 1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>> [ 1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>> [ 1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>> [ 1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>> [ 1.640597] Instruction dump:
>>> [ 1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>> [ 1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>> [ 1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>> [ 1.643220]
>>> [ 2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>> [ 2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>
>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>> binutils 2.37. If there is any additional information I can provide,
>>> please let me know.
>>
>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
>
> It seems to be walking of the end of the pgd.
>
> [ 3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
> [ 3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000 <- end of pgd at PAGE_OFFSET + 4PB
> [ 3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000

Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)

static struct ptdump_range ptdump_range[] __ro_after_init = {
{TASK_SIZE_MAX, ~0UL},
{0, 0}
};


Ok, well, ppc32 go up to 0xffffffff

What's the top address to be used for ppc64 ?

2021-08-30 11:56:30

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP

Christophe Leroy <[email protected]> writes:
> Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
>> Christophe Leroy <[email protected]> writes:
>>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>>
>>>>
>>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>>> config [1] when booting up in QEMU with [2]:
>>>>
>>>> [ 1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>>> [ 1.623058] Faulting instruction address: 0xc00000000045e5fc
>>>> [ 1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>>> [ 1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>>> [ 1.625015] Modules linked in:
>>>> [ 1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>>> [ 1.626237] NIP: c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>>> [ 1.626839] REGS: c00000000752b820 TRAP: 0380 Not tainted (5.14.0-rc7-next-20210827)
>>>> [ 1.627528] MSR: 9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE> CR: 84002482 XER: 20000000
>>>> [ 1.628449] CFAR: c000000000518300 IRQMASK: 0
>>>> [ 1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>>> [ 1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>>> [ 1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>>> [ 1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>>> [ 1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>>> [ 1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>>> [ 1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>>> [ 1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>>> [ 1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>>> [ 1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>>> [ 1.635755] Call Trace:
>>>> [ 1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>>> [ 1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>>> [ 1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>>> [ 1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>>> [ 1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>>> [ 1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>>> [ 1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>>> [ 1.640597] Instruction dump:
>>>> [ 1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>>> [ 1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>>> [ 1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>>> [ 1.643220]
>>>> [ 2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>> [ 2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>
>>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>>> binutils 2.37. If there is any additional information I can provide,
>>>> please let me know.
>>>
>>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
>>
>> It seems to be walking of the end of the pgd.
>>
>> [ 3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
>> [ 3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000 <- end of pgd at PAGE_OFFSET + 4PB
>> [ 3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
>
> Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)

But the page table doesn't span that far? 0_o

> static struct ptdump_range ptdump_range[] __ro_after_init = {
> {TASK_SIZE_MAX, ~0UL},
> {0, 0}
> };
>
> Ok, well, ppc32 go up to 0xffffffff
>
> What's the top address to be used for ppc64 ?

It's different for (hash | radix) x page size.

The below works, and matches what we used to do.

Possibly we can come up with something cleaner, not sure.

cheers


diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 2d80d775d15e..3d3778a74969 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -359,6 +359,8 @@ static int __init ptdump_init(void)
ptdump_range[0].start = KERN_VIRT_START;
else
ptdump_range[0].start = PAGE_OFFSET;
+
+ ptdump_range[0].end = ptdump_range[0].start + (PGDIR_SIZE * PTRS_PER_PGD);
#endif

populate_markers();

2021-08-30 13:17:08

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP



Le 30/08/2021 à 13:55, Michael Ellerman a écrit :
> Christophe Leroy <[email protected]> writes:
>> Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
>>> Christophe Leroy <[email protected]> writes:
>>>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>>>
>>>>>
>>>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>>>> config [1] when booting up in QEMU with [2]:
>>>>>
>>>>> [ 1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>>>> [ 1.623058] Faulting instruction address: 0xc00000000045e5fc
>>>>> [ 1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>>>> [ 1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>>>> [ 1.625015] Modules linked in:
>>>>> [ 1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>>>> [ 1.626237] NIP: c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>>>> [ 1.626839] REGS: c00000000752b820 TRAP: 0380 Not tainted (5.14.0-rc7-next-20210827)
>>>>> [ 1.627528] MSR: 9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE> CR: 84002482 XER: 20000000
>>>>> [ 1.628449] CFAR: c000000000518300 IRQMASK: 0
>>>>> [ 1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>>>> [ 1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>>>> [ 1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>>>> [ 1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>>>> [ 1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>>>> [ 1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>>>> [ 1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>>>> [ 1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>>>> [ 1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>>>> [ 1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>>>> [ 1.635755] Call Trace:
>>>>> [ 1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>>>> [ 1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>>>> [ 1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>>>> [ 1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>>>> [ 1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>>>> [ 1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>>>> [ 1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>>>> [ 1.640597] Instruction dump:
>>>>> [ 1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>>>> [ 1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>>>> [ 1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>>>> [ 1.643220]
>>>>> [ 2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>> [ 2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>
>>>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>>>> binutils 2.37. If there is any additional information I can provide,
>>>>> please let me know.
>>>>
>>>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
>>>
>>> It seems to be walking of the end of the pgd.
>>>
>>> [ 3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
>>> [ 3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000 <- end of pgd at PAGE_OFFSET + 4PB
>>> [ 3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
>>
>> Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)
>
> But the page table doesn't span that far? 0_o
>
>> static struct ptdump_range ptdump_range[] __ro_after_init = {
>> {TASK_SIZE_MAX, ~0UL},
>> {0, 0}
>> };
>>
>> Ok, well, ppc32 go up to 0xffffffff
>>
>> What's the top address to be used for ppc64 ?
>
> It's different for (hash | radix) x page size.
>
> The below works, and matches what we used to do.
>
> Possibly we can come up with something cleaner, not sure.
>
> cheers
>
>
> diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
> index 2d80d775d15e..3d3778a74969 100644
> --- a/arch/powerpc/mm/ptdump/ptdump.c
> +++ b/arch/powerpc/mm/ptdump/ptdump.c
> @@ -359,6 +359,8 @@ static int __init ptdump_init(void)
> ptdump_range[0].start = KERN_VIRT_START;
> else
> ptdump_range[0].start = PAGE_OFFSET;
> +
> + ptdump_range[0].end = ptdump_range[0].start + (PGDIR_SIZE * PTRS_PER_PGD);

Hum ...

It was:

for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {

And there is

#define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))


Do we have the following ?

pgd_index(KERN_VIRT_START) == 0


Shouldn't it be something like

ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);


Christophe

2021-08-31 13:51:29

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v4 4/4] powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP

Christophe Leroy <[email protected]> writes:
> Le 30/08/2021 à 13:55, Michael Ellerman a écrit :
>> Christophe Leroy <[email protected]> writes:
>>> Le 30/08/2021 à 09:52, Michael Ellerman a écrit :
>>>> Christophe Leroy <[email protected]> writes:
>>>>> Le 29/08/2021 à 20:55, Nathan Chancellor a écrit :
>>>>>> On Thu, Jul 08, 2021 at 04:49:43PM +0000, Christophe Leroy wrote:
>>>>>>> This patch converts powerpc to the generic PTDUMP implementation.
>>>>>>>
>>>>>>
>>>>>> This patch as commit e084728393a5 ("powerpc/ptdump: Convert powerpc to
>>>>>> GENERIC_PTDUMP") in powerpc/next causes a panic with Fedora's ppc64le
>>>>>> config [1] when booting up in QEMU with [2]:
>>>>>>
>>>>>> [ 1.621864] BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000
>>>>>> [ 1.623058] Faulting instruction address: 0xc00000000045e5fc
>>>>>> [ 1.623832] Oops: Kernel access of bad area, sig: 11 [#1]
>>>>>> [ 1.624318] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
>>>>>> [ 1.625015] Modules linked in:
>>>>>> [ 1.625463] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc7-next-20210827 #16
>>>>>> [ 1.626237] NIP: c00000000045e5fc LR: c00000000045e580 CTR: c000000000518220
>>>>>> [ 1.626839] REGS: c00000000752b820 TRAP: 0380 Not tainted (5.14.0-rc7-next-20210827)
>>>>>> [ 1.627528] MSR: 9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE> CR: 84002482 XER: 20000000
>>>>>> [ 1.628449] CFAR: c000000000518300 IRQMASK: 0
>>>>>> [ 1.628449] GPR00: c00000000045e580 c00000000752bac0 c0000000028a9300 0000000000000000
>>>>>> [ 1.628449] GPR04: c200800000000000 ffffffffffffffff 000000000000000a 0000000000000001
>>>>>> [ 1.628449] GPR08: c0eeff7f00000000 0000000000000012 0000000000000000 0000000000000000
>>>>>> [ 1.628449] GPR12: 0000000000000000 c000000002b20000 fffffffffffffffe c000000002971a70
>>>>>> [ 1.628449] GPR16: c000000002960040 c0000000011a8f98 c00000000752bbf0 ffffffffffffffff
>>>>>> [ 1.628449] GPR20: c2008fffffffffff c0eeff7f00000000 c000000002971a68 c00a0003ff000000
>>>>>> [ 1.628449] GPR24: c000000002971a78 0000000000000002 0000000000000001 c0000000011a8f98
>>>>>> [ 1.628449] GPR28: c0000000011a8f98 c0000000028daef8 c200800000000000 c200900000000000
>>>>>> [ 1.634090] NIP [c00000000045e5fc] __walk_page_range+0x2bc/0xce0
>>>>>> [ 1.635117] LR [c00000000045e580] __walk_page_range+0x240/0xce0
>>>>>> [ 1.635755] Call Trace:
>>>>>> [ 1.636018] [c00000000752bac0] [c00000000045e580] __walk_page_range+0x240/0xce0 (unreliable)
>>>>>> [ 1.636811] [c00000000752bbd0] [c00000000045f234] walk_page_range_novma+0x74/0xb0
>>>>>> [ 1.637459] [c00000000752bc20] [c000000000518448] ptdump_walk_pgd+0x98/0x170
>>>>>> [ 1.638138] [c00000000752bc70] [c0000000000aa988] ptdump_check_wx+0x88/0xd0
>>>>>> [ 1.638738] [c00000000752bd50] [c00000000008d6d8] mark_rodata_ro+0x48/0x80
>>>>>> [ 1.639299] [c00000000752bdb0] [c000000000012a34] kernel_init+0x74/0x1a0
>>>>>> [ 1.639842] [c00000000752be10] [c00000000000cfd4] ret_from_kernel_thread+0x5c/0x64
>>>>>> [ 1.640597] Instruction dump:
>>>>>> [ 1.641021] 38e7ffff 39490010 7ce707b4 7fca5436 79081564 7d4a3838 7908f082 794a1f24
>>>>>> [ 1.641740] 78a8f00e 30e6ffff 7ea85214 7ce73110 <7d48502a> 78f90fa4 2c2a0000 39290010
>>>>>> [ 1.642771] ---[ end trace 6cf72b085097ad52 ]---
>>>>>> [ 1.643220]
>>>>>> [ 2.644228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>>>>>> [ 2.645523] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>>>>>>
>>>>>> This is not compiler specific, I can reproduce it with GCC 11.2.0 and
>>>>>> binutils 2.37. If there is any additional information I can provide,
>>>>>> please let me know.
>>>>>
>>>>> Can you provide a dissassembly of __walk_page_range() ? Or provide your vmlinux binary.
>>>>
>>>> It seems to be walking of the end of the pgd.
>>>>
>>>> [ 3.373800] walk_p4d_range: addr c00fff0000000000 end c00fff8000000000
>>>> [ 3.373852] walk_p4d_range: addr c00fff8000000000 end c010000000000000 <- end of pgd at PAGE_OFFSET + 4PB
>>>> [ 3.373905] walk_p4d_range: addr c010000000000000 end c010008000000000
>>>
>>> Yes, I want it to walk from TASK_SIZE_MAX up to 0xffffffffffffffff :)
>>
>> But the page table doesn't span that far? 0_o
>>
>>> static struct ptdump_range ptdump_range[] __ro_after_init = {
>>> {TASK_SIZE_MAX, ~0UL},
>>> {0, 0}
>>> };
>>>
>>> Ok, well, ppc32 go up to 0xffffffff
>>>
>>> What's the top address to be used for ppc64 ?
>>
>> It's different for (hash | radix) x page size.
>>
>> The below works, and matches what we used to do.
>>
>> Possibly we can come up with something cleaner, not sure.
>>
>> cheers
>>
>>
>> diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
>> index 2d80d775d15e..3d3778a74969 100644
>> --- a/arch/powerpc/mm/ptdump/ptdump.c
>> +++ b/arch/powerpc/mm/ptdump/ptdump.c
>> @@ -359,6 +359,8 @@ static int __init ptdump_init(void)
>> ptdump_range[0].start = KERN_VIRT_START;
>> else
>> ptdump_range[0].start = PAGE_OFFSET;
>> +
>> + ptdump_range[0].end = ptdump_range[0].start + (PGDIR_SIZE * PTRS_PER_PGD);
>
> Hum ...
>
> It was:
>
> for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
>
> And there is
>
> #define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))

Yes you're right.

> Do we have the following ?
>
> pgd_index(KERN_VIRT_START) == 0

No.

Since 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the same 0xc range")

It's:

pgd_index(PAGE_OFFSET) == 0


> Shouldn't it be something like
>
> ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);

Yep.

And we should also change the start address for hash to be PAGE_OFFSET.
Even though we don't expect anything in the page tables between
PAGE_OFFSET and KERN_VIRT_START, it's still good to check that range.

cheers