2019-06-07 06:03:49

by Anup Patel

[permalink] [raw]
Subject: [PATCH v5 2/2] RISC-V: Setup initial page tables in two stages

Currently, the setup_vm() does initial page table setup in one-shot
very early before enabling MMU. Due to this, the setup_vm() has to map
all possible kernel virtual addresses since it does not know size and
location of RAM. This means we have kernel mappings for non-existent
RAM and any buggy driver (or kernel) code doing out-of-bound access
to RAM will not fault and cause underterministic behaviour.

Further, the setup_vm() creates PMD mappings (i.e. 2M mappings) for
RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000 (i.e.
MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages (i.e.
516 KB) of memory for initial page tables which is never freed. The
memory required for initial page tables will further increase if
we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000)

This patch implements two-staged initial page table setup, as follows:
1. Early (i.e. setup_vm()): This stage maps kernel image and DTB in
a early page table (i.e. early_pg_dir). The early_pg_dir will be used
only by boot HART so it can be freed as-part of init memory free-up.
2. Final (i.e. setup_vm_final()): This stage maps all possible RAM
banks in the final page table (i.e. swapper_pg_dir). The boot HART
will start using swapper_pg_dir at the end of setup_vm_final(). All
non-boot HARTs directly use the swapper_pg_dir created by boot HART.

We have following advantages with this new approach:
1. Kernel mappings for non-existent RAM don't exists anymore.
2. Memory consumed by initial page tables is now indpendent of the
chosen PAGE_OFFSET.
3. Memory consumed by initial page tables on RV64 system is 2 pages
(i.e. 8 KB) which has significantly reduced and these pages will be
freed as-part of the init memory free-up.

The patch also provides a foundation for implementing strict kernel
mappings where we protect kernel text and rodata using PTE permissions.

Suggested-by: Mike Rapoport <[email protected]>
Signed-off-by: Anup Patel <[email protected]>
---
arch/riscv/include/asm/fixmap.h | 5 +
arch/riscv/include/asm/pgtable-64.h | 5 +
arch/riscv/include/asm/pgtable.h | 8 +
arch/riscv/kernel/head.S | 17 +-
arch/riscv/kernel/setup.c | 6 +-
arch/riscv/mm/init.c | 317 ++++++++++++++++++++++------
6 files changed, 285 insertions(+), 73 deletions(-)

diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index c207f6634b91..9c66033c3a54 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -21,6 +21,11 @@
*/
enum fixed_addresses {
FIX_HOLE,
+#define FIX_FDT_SIZE SZ_1M
+ FIX_FDT_END,
+ FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
+ FIX_PTE,
+ FIX_PMD,
FIX_EARLYCON_MEM_BASE,
__end_of_fixed_addresses
};
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 7aa0ea9bd8bb..56ecc3dc939d 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -78,6 +78,11 @@ static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
}

+static inline unsigned long _pmd_pfn(pmd_t pmd)
+{
+ return pmd_val(pmd) >> _PAGE_PFN_SHIFT;
+}
+
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 1141364d990e..367554dfa4db 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -67,6 +67,8 @@
#define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC)

+#define PAGE_TABLE __pgprot(_PAGE_TABLE)
+
extern pgd_t swapper_pg_dir[];

/* MAP_PRIVATE permissions: xwr (copy-on-write) */
@@ -127,6 +129,11 @@ static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot)
return __pgd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
}

+static inline unsigned long _pgd_pfn(pgd_t pgd)
+{
+ return pgd_val(pgd) >> _PAGE_PFN_SHIFT;
+}
+
#define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))

/* Locate an entry in the page global directory */
@@ -404,6 +411,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
#define kern_addr_valid(addr) (1) /* FIXME */
#endif

+extern void *dtb_early_va;
extern void setup_bootmem(void);
extern void paging_init(void);

diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 370c66ce187a..d3d10d3d25a3 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -63,7 +63,9 @@ clear_bss_done:

/* Initialize page tables and relocate to virtual addresses */
la sp, init_thread_union + THREAD_SIZE
+ mv a0, s1
call setup_vm
+ la a0, early_pg_dir
call relocate

/* Restore C environment */
@@ -72,25 +74,23 @@ clear_bss_done:
la sp, init_thread_union + THREAD_SIZE

/* Start the kernel */
- mv a0, s1
call parse_dtb
tail start_kernel

relocate:
/* Relocate return address */
li a1, PAGE_OFFSET
- la a0, _start
- sub a1, a1, a0
+ la a2, _start
+ sub a1, a1, a2
add ra, ra, a1

/* Point stvec to virtual address of intruction after satp write */
- la a0, 1f
- add a0, a0, a1
- csrw CSR_STVEC, a0
+ la a2, 1f
+ add a2, a2, a1
+ csrw CSR_STVEC, a2

/* Compute satp for kernel page tables, but don't load it yet */
- la a2, swapper_pg_dir
- srl a2, a2, PAGE_SHIFT
+ srl a2, a0, PAGE_SHIFT
li a1, SATP_MODE
or a2, a2, a1

@@ -156,6 +156,7 @@ relocate:
fence

/* Enable virtual memory and relocate to virtual address */
+ la a0, swapper_pg_dir
call relocate

tail smp_callin
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index b92e6831d1ec..a990a6cb184f 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -39,11 +39,9 @@ struct screen_info screen_info = {
atomic_t hart_lottery;
unsigned long boot_cpu_hartid;

-void __init parse_dtb(phys_addr_t dtb_phys)
+void __init parse_dtb(void)
{
- void *dtb = __va(dtb_phys);
-
- if (early_init_dt_scan(dtb))
+ if (early_init_dt_scan(dtb_early_va))
return;

pr_err("No DTB passed to the kernel\n");
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 1879501bd156..22d55e289d8f 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -1,14 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2012 Regents of the University of California
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
*/

#include <linux/init.h>
@@ -49,13 +42,6 @@ void setup_zero_page(void)
memset((void *)empty_zero_page, 0, PAGE_SIZE);
}

-void __init paging_init(void)
-{
- setup_zero_page();
- local_flush_tlb_all();
- zone_sizes_init();
-}
-
void __init mem_init(void)
{
#ifdef CONFIG_FLATMEM
@@ -156,17 +142,15 @@ EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base;
EXPORT_SYMBOL(pfn_base);

+void *dtb_early_va;
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
-pgd_t trampoline_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
+pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
+static bool mmu_enabled;

-#ifndef __PAGETABLE_PMD_FOLDED
-#define NUM_SWAPPER_PMDS ((uintptr_t)-PAGE_OFFSET >> PGDIR_SHIFT)
-pmd_t swapper_pmd[PTRS_PER_PMD*((-PAGE_OFFSET)/PGDIR_SIZE)] __page_aligned_bss;
-pmd_t trampoline_pmd[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
-pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
-#endif
+#define MAX_EARLY_MAPPING_SIZE SZ_128M

-pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
+pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);

void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
{
@@ -185,6 +169,156 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
}
}

+static pte_t *__init get_pte_virt(phys_addr_t pa)
+{
+ if (mmu_enabled) {
+ clear_fixmap(FIX_PTE);
+ return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
+ } else {
+ return (pte_t *)((uintptr_t)pa);
+ }
+}
+
+static phys_addr_t __init alloc_pte(uintptr_t va)
+{
+ /*
+ * We only create PMD or PGD early mappings so we
+ * should never reach here with MMU disabled.
+ */
+ BUG_ON(!mmu_enabled);
+
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static void __init create_pte_mapping(pte_t *ptep,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ uintptr_t pte_index = pte_index(va);
+
+ BUG_ON(sz != PAGE_SIZE);
+
+ if (pte_none(ptep[pte_index]))
+ ptep[pte_index] = pfn_pte(PFN_DOWN(pa), prot);
+}
+
+#ifndef __PAGETABLE_PMD_FOLDED
+
+pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
+pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
+
+#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE
+#define NUM_EARLY_PMDS 1UL
+#else
+#define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE)
+#endif
+pmd_t early_pmd[PTRS_PER_PMD*NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);
+
+static pmd_t *__init get_pmd_virt(phys_addr_t pa)
+{
+ if (mmu_enabled) {
+ clear_fixmap(FIX_PMD);
+ return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
+ } else {
+ return (pmd_t *)((uintptr_t)pa);
+ }
+}
+
+static phys_addr_t __init alloc_pmd(uintptr_t va)
+{
+ uintptr_t pmd_num;
+
+ if (mmu_enabled)
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+
+ pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
+ BUG_ON(pmd_num >= NUM_EARLY_PMDS);
+ return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
+}
+
+static void __init create_pmd_mapping(pmd_t *pmdp,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ pte_t *ptep;
+ phys_addr_t pte_phys;
+ uintptr_t pmd_index = pmd_index(va);
+
+ if (sz == PMD_SIZE) {
+ if (pmd_none(pmdp[pmd_index]))
+ pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pa), prot);
+ return;
+ }
+
+ if (pmd_none(pmdp[pmd_index])) {
+ pte_phys = alloc_pte(va);
+ pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
+ ptep = get_pte_virt(pte_phys);
+ memset(ptep, 0, PAGE_SIZE);
+ } else {
+ pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_index]));
+ ptep = get_pte_virt(pte_phys);
+ }
+
+ create_pte_mapping(ptep, va, pa, sz, prot);
+}
+
+#define pgd_next_t pmd_t
+#define alloc_pgd_next(__va) alloc_pmd(__va)
+#define get_pgd_next_virt(__pa) get_pmd_virt(__pa)
+#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
+ create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
+#define PTE_PARENT_SIZE PMD_SIZE
+#define fixmap_pgd_next fixmap_pmd
+#else
+#define pgd_next_t pte_t
+#define alloc_pgd_next(__va) alloc_pte(__va)
+#define get_pgd_next_virt(__pa) get_pte_virt(__pa)
+#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
+ create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
+#define PTE_PARENT_SIZE PGDIR_SIZE
+#define fixmap_pgd_next fixmap_pte
+#endif
+
+static void __init create_pgd_mapping(pgd_t *pgdp,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ pgd_next_t *nextp;
+ phys_addr_t next_phys;
+ uintptr_t pgd_index = pgd_index(va);
+
+ if (sz == PGDIR_SIZE) {
+ if (pgd_val(pgdp[pgd_index]) == 0)
+ pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pa), prot);
+ return;
+ }
+
+ if (pgd_val(pgdp[pgd_index]) == 0) {
+ next_phys = alloc_pgd_next(va);
+ pgdp[pgd_index] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE);
+ nextp = get_pgd_next_virt(next_phys);
+ memset(nextp, 0, PAGE_SIZE);
+ } else {
+ next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_index]));
+ nextp = get_pgd_next_virt(next_phys);
+ }
+
+ create_pgd_next_mapping(nextp, va, pa, sz, prot);
+}
+
+static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
+{
+ uintptr_t map_size = PAGE_SIZE;
+
+ /* Upgrade to PMD/PGDIR mappings whenever possible */
+ if (!(base & (PTE_PARENT_SIZE - 1)) &&
+ !(size & (PTE_PARENT_SIZE - 1)))
+ map_size = PTE_PARENT_SIZE;
+
+ return map_size;
+}
+
/*
* setup_vm() is called from head.S with MMU-off.
*
@@ -204,54 +338,115 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
"not use absolute addressing."
#endif

-asmlinkage void __init setup_vm(void)
+asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
- uintptr_t i;
- uintptr_t pa = (uintptr_t) &_start;
- pgprot_t prot = __pgprot(pgprot_val(PAGE_KERNEL) | _PAGE_EXEC);
+ uintptr_t va, end_va;
+ uintptr_t load_pa = (uintptr_t)(&_start);
+ uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
+ uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);
+
+ va_pa_offset = PAGE_OFFSET - load_pa;
+ pfn_base = PFN_DOWN(load_pa);

- va_pa_offset = PAGE_OFFSET - pa;
- pfn_base = PFN_DOWN(pa);
+ /*
+ * Enforce boot alignment requirements of RV32 and
+ * RV64 by only allowing PMD or PGD mappings.
+ */
+ BUG_ON(map_size == PAGE_SIZE);

/* Sanity check alignment and size */
BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
- BUG_ON((pa % (PAGE_SIZE * PTRS_PER_PTE)) != 0);
+ BUG_ON((load_pa % map_size) != 0);
+ BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE);
+
+ /* Setup early PGD for fixmap */
+ create_pgd_mapping(early_pg_dir, FIXADDR_START,
+ (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);

#ifndef __PAGETABLE_PMD_FOLDED
- trampoline_pg_dir[(PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD] =
- pfn_pgd(PFN_DOWN((uintptr_t)trampoline_pmd),
- __pgprot(_PAGE_TABLE));
- trampoline_pmd[0] = pfn_pmd(PFN_DOWN(pa), prot);
+ /* Setup fixmap PMD */
+ create_pmd_mapping(fixmap_pmd, FIXADDR_START,
+ (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
+ /* Setup trampoline PGD and PMD */
+ create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
+ (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
+ create_pmd_mapping(trampoline_pmd, PAGE_OFFSET,
+ load_pa, PMD_SIZE, PAGE_KERNEL_EXEC);
+#else
+ /* Setup trampoline PGD */
+ create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
+ load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC);
+#endif

- for (i = 0; i < (-PAGE_OFFSET)/PGDIR_SIZE; ++i) {
- size_t o = (PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD + i;
+ /*
+ * Setup early PGD covering entire kernel which will allows
+ * us to reach paging_init(). We map all memory banks later
+ * in setup_vm_final() below.
+ */
+ end_va = PAGE_OFFSET + load_sz;
+ for (va = PAGE_OFFSET; va < end_va; va += map_size)
+ create_pgd_mapping(early_pg_dir, va,
+ load_pa + (va - PAGE_OFFSET),
+ map_size, PAGE_KERNEL_EXEC);
+
+ /* Create fixed mapping for early FDT parsing */
+ end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE;
+ for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE)
+ create_pte_mapping(fixmap_pte, va,
+ dtb_pa + (va - __fix_to_virt(FIX_FDT)),
+ PAGE_SIZE, PAGE_KERNEL);
+
+ /* Save pointer to DTB for early FDT parsing */
+ dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK);
+}

- swapper_pg_dir[o] =
- pfn_pgd(PFN_DOWN((uintptr_t)swapper_pmd) + i,
- __pgprot(_PAGE_TABLE));
- }
- for (i = 0; i < ARRAY_SIZE(swapper_pmd); i++)
- swapper_pmd[i] = pfn_pmd(PFN_DOWN(pa + i * PMD_SIZE), prot);
-
- swapper_pg_dir[(FIXADDR_START >> PGDIR_SHIFT) % PTRS_PER_PGD] =
- pfn_pgd(PFN_DOWN((uintptr_t)fixmap_pmd),
- __pgprot(_PAGE_TABLE));
- fixmap_pmd[(FIXADDR_START >> PMD_SHIFT) % PTRS_PER_PMD] =
- pfn_pmd(PFN_DOWN((uintptr_t)fixmap_pte),
- __pgprot(_PAGE_TABLE));
-#else
- trampoline_pg_dir[(PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD] =
- pfn_pgd(PFN_DOWN(pa), prot);
+static void __init setup_vm_final(void)
+{
+ uintptr_t va, map_size;
+ phys_addr_t pa, start, end;
+ struct memblock_region *reg;
+
+ /* Set mmu_enabled flag */
+ mmu_enabled = true;

- for (i = 0; i < (-PAGE_OFFSET)/PGDIR_SIZE; ++i) {
- size_t o = (PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD + i;
+ /* Setup swapper PGD for fixmap */
+ create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
+ __pa(fixmap_pgd_next),
+ PGDIR_SIZE, PAGE_TABLE);

- swapper_pg_dir[o] =
- pfn_pgd(PFN_DOWN(pa + i * PGDIR_SIZE), prot);
+ /* Map all memory banks */
+ for_each_memblock(memory, reg) {
+ start = reg->base;
+ end = start + reg->size;
+
+ if (start >= end)
+ break;
+ if (memblock_is_nomap(reg))
+ continue;
+ if (start <= __pa(PAGE_OFFSET) &&
+ __pa(PAGE_OFFSET) < end)
+ start = __pa(PAGE_OFFSET);
+
+ map_size = best_map_size(start, end - start);
+ for (pa = start; pa < end; pa += map_size) {
+ va = (uintptr_t)__va(pa);
+ create_pgd_mapping(swapper_pg_dir, va, pa,
+ map_size, PAGE_KERNEL_EXEC);
+ }
}

- swapper_pg_dir[(FIXADDR_START >> PGDIR_SHIFT) % PTRS_PER_PGD] =
- pfn_pgd(PFN_DOWN((uintptr_t)fixmap_pte),
- __pgprot(_PAGE_TABLE));
-#endif
+ /* Clear fixmap PTE and PMD mappings */
+ clear_fixmap(FIX_PTE);
+ clear_fixmap(FIX_PMD);
+
+ /* Move to swapper page table */
+ csr_write(sptbr, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE);
+ local_flush_tlb_all();
+}
+
+void __init paging_init(void)
+{
+ setup_vm_final();
+ setup_zero_page();
+ zone_sizes_init();
}
--
2.17.1


2019-07-11 00:07:41

by Paul Walmsley

[permalink] [raw]
Subject: Re: [PATCH v5 2/2] RISC-V: Setup initial page tables in two stages

On Fri, 7 Jun 2019, Anup Patel wrote:

> Currently, the setup_vm() does initial page table setup in one-shot
> very early before enabling MMU. Due to this, the setup_vm() has to map
> all possible kernel virtual addresses since it does not know size and
> location of RAM. This means we have kernel mappings for non-existent
> RAM and any buggy driver (or kernel) code doing out-of-bound access
> to RAM will not fault and cause underterministic behaviour.
>
> Further, the setup_vm() creates PMD mappings (i.e. 2M mappings) for
> RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000 (i.e.
> MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages (i.e.
> 516 KB) of memory for initial page tables which is never freed. The
> memory required for initial page tables will further increase if
> we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000)
>
> This patch implements two-staged initial page table setup, as follows:
> 1. Early (i.e. setup_vm()): This stage maps kernel image and DTB in
> a early page table (i.e. early_pg_dir). The early_pg_dir will be used
> only by boot HART so it can be freed as-part of init memory free-up.
> 2. Final (i.e. setup_vm_final()): This stage maps all possible RAM
> banks in the final page table (i.e. swapper_pg_dir). The boot HART
> will start using swapper_pg_dir at the end of setup_vm_final(). All
> non-boot HARTs directly use the swapper_pg_dir created by boot HART.
>
> We have following advantages with this new approach:
> 1. Kernel mappings for non-existent RAM don't exists anymore.
> 2. Memory consumed by initial page tables is now indpendent of the
> chosen PAGE_OFFSET.
> 3. Memory consumed by initial page tables on RV64 system is 2 pages
> (i.e. 8 KB) which has significantly reduced and these pages will be
> freed as-part of the init memory free-up.
>
> The patch also provides a foundation for implementing strict kernel
> mappings where we protect kernel text and rodata using PTE permissions.
>
> Suggested-by: Mike Rapoport <[email protected]>
> Signed-off-by: Anup Patel <[email protected]>

Thanks, updated to apply and to fix a checkpatch warning, and queued.

This may not make it in for v5.3-rc1; if not, we'll submit it later.


- Paul

2019-08-15 20:11:54

by David Abdurachmanov

[permalink] [raw]
Subject: Re: [PATCH v5 2/2] RISC-V: Setup initial page tables in two stages

On Thu, Aug 15, 2019 at 11:57 AM Alistair Francis
<[email protected]> wrote:
>
> On Wed, 2019-07-10 at 17:05 -0700, Paul Walmsley wrote:
> > On Fri, 7 Jun 2019, Anup Patel wrote:
> >
> > > Currently, the setup_vm() does initial page table setup in one-shot
> > > very early before enabling MMU. Due to this, the setup_vm() has to
> > > map
> > > all possible kernel virtual addresses since it does not know size
> > > and
> > > location of RAM. This means we have kernel mappings for non-
> > > existent
> > > RAM and any buggy driver (or kernel) code doing out-of-bound access
> > > to RAM will not fault and cause underterministic behaviour.
> > >
> > > Further, the setup_vm() creates PMD mappings (i.e. 2M mappings) for
> > > RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000 (i.e.
> > > MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages (i.e.
> > > 516 KB) of memory for initial page tables which is never freed. The
> > > memory required for initial page tables will further increase if
> > > we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000)
> > >
> > > This patch implements two-staged initial page table setup, as
> > > follows:
> > > 1. Early (i.e. setup_vm()): This stage maps kernel image and DTB in
> > > a early page table (i.e. early_pg_dir). The early_pg_dir will be
> > > used
> > > only by boot HART so it can be freed as-part of init memory free-
> > > up.
> > > 2. Final (i.e. setup_vm_final()): This stage maps all possible RAM
> > > banks in the final page table (i.e. swapper_pg_dir). The boot HART
> > > will start using swapper_pg_dir at the end of setup_vm_final(). All
> > > non-boot HARTs directly use the swapper_pg_dir created by boot
> > > HART.
> > >
> > > We have following advantages with this new approach:
> > > 1. Kernel mappings for non-existent RAM don't exists anymore.
> > > 2. Memory consumed by initial page tables is now indpendent of the
> > > chosen PAGE_OFFSET.
> > > 3. Memory consumed by initial page tables on RV64 system is 2 pages
> > > (i.e. 8 KB) which has significantly reduced and these pages will be
> > > freed as-part of the init memory free-up.
> > >
> > > The patch also provides a foundation for implementing strict kernel
> > > mappings where we protect kernel text and rodata using PTE
> > > permissions.
> > >
> > > Suggested-by: Mike Rapoport <[email protected]>
> > > Signed-off-by: Anup Patel <[email protected]>
> >
> > Thanks, updated to apply and to fix a checkpatch warning, and
> > queued.
> >
> > This may not make it in for v5.3-rc1; if not, we'll submit it later.
>
> I'm seeing this failure on RV32 which I bisected to this patch:
>
> [ 1.820461] systemd[1]: systemd 242-19-gdb2e367+ running in system
> mode. (-PAM -AUDIT -SELINUX +IMA -APPARMOR +SMACK +SYSVINIT +UTMP
> -LIBCRYPTSETUP -GCRYPT -GNUTLS +ACL +XZ -LZ4 -SECCOMP +BLKID -ELFUTILS
> +KMOD -IDN2 -IDN -PCRE2 default-hierarchy=hybrid)
> [ 1.824320] Unable to handle kernel paging request at virtual
> address 9ff00c15
> [ 1.824973] Oops [#1]
> [ 1.825162] Modules linked in:
> [ 1.825536] CPU: 0 PID: 1 Comm: systemd Not tainted 5.2.0-rc7 #1
> [ 1.826039] sepc: c05c3c78 ra : c04b5a74 sp : df047ce0
> [ 1.826514] gp : c07a1038 tp : df04c000 t0 : 000000fc
> [ 1.826919] t1 : 00000002 t2 : 000003ef s0 : df047cf0
> [ 1.827322] s1 : df7090f8 a0 : 9ff00c15 a1 : c072166c
> [ 1.827723] a2 : 00000000 a3 : 00000001 a4 : 00000001
> [ 1.828104] a5 : df6f8138 a6 : 0000002f a7 : de62a000
> [ 1.828534] s2 : c072166c s3 : 00000000 s4 : 00000000
> [ 1.828931] s5 : c07a2000 s6 : 00400cc0 s7 : 00000400
> [ 1.829319] s8 : de491018 s9 : 00000000 s10: fffff000
> [ 1.829702] s11: de491030 t3 : de62b000 t4 : 00000000
> [ 1.830090] t5 : 00000000 t6 : 00000080
> [ 1.830392] sstatus: 00000100 sbadaddr: 9ff00c15 scause: 0000000d
> [ 1.831616] ---[ end trace 49a926a1a5300c00 ]---
> [ 1.835776] Kernel panic - not syncing: Attempted to kill init!
> exitcode=0x0000000b
> [ 1.836575] ---[ end Kernel panic - not syncing: Attempted to kill
> init! exitcode=0x0000000b ]---
>
> Does anyone else see this?
>
> A simple revert of this patch on 5.3-rc4 fixes the issue for me.

Yes, I do see those in Fedora/RISCV build farm every morning, but with
riscv64 and 5.2.0-rc7 kernel.

You also seem to run 5.2.0-rc7 kernel.

fedora-riscv-4 login: [178876.406122] Unable to handle kernel paging
request at virtual address 0000000000012a28
fedora-riscv-7 login: [17983.074847] Unable to handle kernel paging
request at virtual address 0fffffdff5e14700

david

2019-08-15 21:03:15

by Paul Walmsley

[permalink] [raw]
Subject: Re: [PATCH v5 2/2] RISC-V: Setup initial page tables in two stages

On Thu, 15 Aug 2019, David Abdurachmanov wrote:

> Yes, I do see those in Fedora/RISCV build farm every morning, but with
> riscv64 and 5.2.0-rc7 kernel.

[...]

> fedora-riscv-4 login: [178876.406122] Unable to handle kernel paging
> request at virtual address 0000000000012a28
> fedora-riscv-7 login: [17983.074847] Unable to handle kernel paging
> request at virtual address 0fffffdff5e14700

Alistair, you're seeing panics immediately after the userspace transition,
right? 100% of the time?

If so, this is probably a different bug. Most likely the TLB flushing
issue.


- Paul

2019-08-15 21:42:23

by Alistair Francis

[permalink] [raw]
Subject: Re: [PATCH v5 2/2] RISC-V: Setup initial page tables in two stages

On Wed, 2019-07-10 at 17:05 -0700, Paul Walmsley wrote:
> On Fri, 7 Jun 2019, Anup Patel wrote:
>
> > Currently, the setup_vm() does initial page table setup in one-shot
> > very early before enabling MMU. Due to this, the setup_vm() has to
> > map
> > all possible kernel virtual addresses since it does not know size
> > and
> > location of RAM. This means we have kernel mappings for non-
> > existent
> > RAM and any buggy driver (or kernel) code doing out-of-bound access
> > to RAM will not fault and cause underterministic behaviour.
> >
> > Further, the setup_vm() creates PMD mappings (i.e. 2M mappings) for
> > RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000 (i.e.
> > MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages (i.e.
> > 516 KB) of memory for initial page tables which is never freed. The
> > memory required for initial page tables will further increase if
> > we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000)
> >
> > This patch implements two-staged initial page table setup, as
> > follows:
> > 1. Early (i.e. setup_vm()): This stage maps kernel image and DTB in
> > a early page table (i.e. early_pg_dir). The early_pg_dir will be
> > used
> > only by boot HART so it can be freed as-part of init memory free-
> > up.
> > 2. Final (i.e. setup_vm_final()): This stage maps all possible RAM
> > banks in the final page table (i.e. swapper_pg_dir). The boot HART
> > will start using swapper_pg_dir at the end of setup_vm_final(). All
> > non-boot HARTs directly use the swapper_pg_dir created by boot
> > HART.
> >
> > We have following advantages with this new approach:
> > 1. Kernel mappings for non-existent RAM don't exists anymore.
> > 2. Memory consumed by initial page tables is now indpendent of the
> > chosen PAGE_OFFSET.
> > 3. Memory consumed by initial page tables on RV64 system is 2 pages
> > (i.e. 8 KB) which has significantly reduced and these pages will be
> > freed as-part of the init memory free-up.
> >
> > The patch also provides a foundation for implementing strict kernel
> > mappings where we protect kernel text and rodata using PTE
> > permissions.
> >
> > Suggested-by: Mike Rapoport <[email protected]>
> > Signed-off-by: Anup Patel <[email protected]>
>
> Thanks, updated to apply and to fix a checkpatch warning, and
> queued.
>
> This may not make it in for v5.3-rc1; if not, we'll submit it later.

I'm seeing this failure on RV32 which I bisected to this patch:

[ 1.820461] systemd[1]: systemd 242-19-gdb2e367+ running in system
mode. (-PAM -AUDIT -SELINUX +IMA -APPARMOR +SMACK +SYSVINIT +UTMP
-LIBCRYPTSETUP -GCRYPT -GNUTLS +ACL +XZ -LZ4 -SECCOMP +BLKID -ELFUTILS
+KMOD -IDN2 -IDN -PCRE2 default-hierarchy=hybrid)
[ 1.824320] Unable to handle kernel paging request at virtual
address 9ff00c15
[ 1.824973] Oops [#1]
[ 1.825162] Modules linked in:
[ 1.825536] CPU: 0 PID: 1 Comm: systemd Not tainted 5.2.0-rc7 #1
[ 1.826039] sepc: c05c3c78 ra : c04b5a74 sp : df047ce0
[ 1.826514] gp : c07a1038 tp : df04c000 t0 : 000000fc
[ 1.826919] t1 : 00000002 t2 : 000003ef s0 : df047cf0
[ 1.827322] s1 : df7090f8 a0 : 9ff00c15 a1 : c072166c
[ 1.827723] a2 : 00000000 a3 : 00000001 a4 : 00000001
[ 1.828104] a5 : df6f8138 a6 : 0000002f a7 : de62a000
[ 1.828534] s2 : c072166c s3 : 00000000 s4 : 00000000
[ 1.828931] s5 : c07a2000 s6 : 00400cc0 s7 : 00000400
[ 1.829319] s8 : de491018 s9 : 00000000 s10: fffff000
[ 1.829702] s11: de491030 t3 : de62b000 t4 : 00000000
[ 1.830090] t5 : 00000000 t6 : 00000080
[ 1.830392] sstatus: 00000100 sbadaddr: 9ff00c15 scause: 0000000d
[ 1.831616] ---[ end trace 49a926a1a5300c00 ]---
[ 1.835776] Kernel panic - not syncing: Attempted to kill init!
exitcode=0x0000000b
[ 1.836575] ---[ end Kernel panic - not syncing: Attempted to kill
init! exitcode=0x0000000b ]---

Does anyone else see this?

A simple revert of this patch on 5.3-rc4 fixes the issue for me.

Alistair

>
>
> - Paul
>
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv

2019-08-15 23:13:40

by Alistair Francis

[permalink] [raw]
Subject: Re: [PATCH v5 2/2] RISC-V: Setup initial page tables in two stages

On Thu, 2019-08-15 at 12:07 -0700, David Abdurachmanov wrote:
> On Thu, Aug 15, 2019 at 11:57 AM Alistair Francis
> <[email protected]> wrote:
> > On Wed, 2019-07-10 at 17:05 -0700, Paul Walmsley wrote:
> > > On Fri, 7 Jun 2019, Anup Patel wrote:
> > >
> > > > Currently, the setup_vm() does initial page table setup in one-
> > > > shot
> > > > very early before enabling MMU. Due to this, the setup_vm() has
> > > > to
> > > > map
> > > > all possible kernel virtual addresses since it does not know
> > > > size
> > > > and
> > > > location of RAM. This means we have kernel mappings for non-
> > > > existent
> > > > RAM and any buggy driver (or kernel) code doing out-of-bound
> > > > access
> > > > to RAM will not fault and cause underterministic behaviour.
> > > >
> > > > Further, the setup_vm() creates PMD mappings (i.e. 2M mappings)
> > > > for
> > > > RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000
> > > > (i.e.
> > > > MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages
> > > > (i.e.
> > > > 516 KB) of memory for initial page tables which is never freed.
> > > > The
> > > > memory required for initial page tables will further increase
> > > > if
> > > > we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000)
> > > >
> > > > This patch implements two-staged initial page table setup, as
> > > > follows:
> > > > 1. Early (i.e. setup_vm()): This stage maps kernel image and
> > > > DTB in
> > > > a early page table (i.e. early_pg_dir). The early_pg_dir will
> > > > be
> > > > used
> > > > only by boot HART so it can be freed as-part of init memory
> > > > free-
> > > > up.
> > > > 2. Final (i.e. setup_vm_final()): This stage maps all possible
> > > > RAM
> > > > banks in the final page table (i.e. swapper_pg_dir). The boot
> > > > HART
> > > > will start using swapper_pg_dir at the end of setup_vm_final().
> > > > All
> > > > non-boot HARTs directly use the swapper_pg_dir created by boot
> > > > HART.
> > > >
> > > > We have following advantages with this new approach:
> > > > 1. Kernel mappings for non-existent RAM don't exists anymore.
> > > > 2. Memory consumed by initial page tables is now indpendent of
> > > > the
> > > > chosen PAGE_OFFSET.
> > > > 3. Memory consumed by initial page tables on RV64 system is 2
> > > > pages
> > > > (i.e. 8 KB) which has significantly reduced and these pages
> > > > will be
> > > > freed as-part of the init memory free-up.
> > > >
> > > > The patch also provides a foundation for implementing strict
> > > > kernel
> > > > mappings where we protect kernel text and rodata using PTE
> > > > permissions.
> > > >
> > > > Suggested-by: Mike Rapoport <[email protected]>
> > > > Signed-off-by: Anup Patel <[email protected]>
> > >
> > > Thanks, updated to apply and to fix a checkpatch warning, and
> > > queued.
> > >
> > > This may not make it in for v5.3-rc1; if not, we'll submit it
> > > later.
> >
> > I'm seeing this failure on RV32 which I bisected to this patch:
> >
> > [ 1.820461] systemd[1]: systemd 242-19-gdb2e367+ running in
> > system
> > mode. (-PAM -AUDIT -SELINUX +IMA -APPARMOR +SMACK +SYSVINIT +UTMP
> > -LIBCRYPTSETUP -GCRYPT -GNUTLS +ACL +XZ -LZ4 -SECCOMP +BLKID
> > -ELFUTILS
> > +KMOD -IDN2 -IDN -PCRE2 default-hierarchy=hybrid)
> > [ 1.824320] Unable to handle kernel paging request at virtual
> > address 9ff00c15
> > [ 1.824973] Oops [#1]
> > [ 1.825162] Modules linked in:
> > [ 1.825536] CPU: 0 PID: 1 Comm: systemd Not tainted 5.2.0-rc7 #1
> > [ 1.826039] sepc: c05c3c78 ra : c04b5a74 sp : df047ce0
> > [ 1.826514] gp : c07a1038 tp : df04c000 t0 : 000000fc
> > [ 1.826919] t1 : 00000002 t2 : 000003ef s0 : df047cf0
> > [ 1.827322] s1 : df7090f8 a0 : 9ff00c15 a1 : c072166c
> > [ 1.827723] a2 : 00000000 a3 : 00000001 a4 : 00000001
> > [ 1.828104] a5 : df6f8138 a6 : 0000002f a7 : de62a000
> > [ 1.828534] s2 : c072166c s3 : 00000000 s4 : 00000000
> > [ 1.828931] s5 : c07a2000 s6 : 00400cc0 s7 : 00000400
> > [ 1.829319] s8 : de491018 s9 : 00000000 s10: fffff000
> > [ 1.829702] s11: de491030 t3 : de62b000 t4 : 00000000
> > [ 1.830090] t5 : 00000000 t6 : 00000080
> > [ 1.830392] sstatus: 00000100 sbadaddr: 9ff00c15 scause:
> > 0000000d
> > [ 1.831616] ---[ end trace 49a926a1a5300c00 ]---
> > [ 1.835776] Kernel panic - not syncing: Attempted to kill init!
> > exitcode=0x0000000b
> > [ 1.836575] ---[ end Kernel panic - not syncing: Attempted to
> > kill
> > init! exitcode=0x0000000b ]---
> >
> > Does anyone else see this?
> >
> > A simple revert of this patch on 5.3-rc4 fixes the issue for me.
>
> Yes, I do see those in Fedora/RISCV build farm every morning, but
> with
> riscv64 and 5.2.0-rc7 kernel.
>
> You also seem to run 5.2.0-rc7 kernel.

That is just a copy error as I copied the log from my bisect to paste
it into my commit. I can reproduce this on 5.3-rc4 as well.

Alistair

>
> fedora-riscv-4 login: [178876.406122] Unable to handle kernel paging
> request at virtual address 0000000000012a28
> fedora-riscv-7 login: [17983.074847] Unable to handle kernel paging
> request at virtual address 0fffffdff5e14700
>
> david

2019-08-15 23:38:28

by Alistair Francis

[permalink] [raw]
Subject: Re: [PATCH v5 2/2] RISC-V: Setup initial page tables in two stages

On Thu, 2019-08-15 at 13:29 -0700, Paul Walmsley wrote:
> On Thu, 15 Aug 2019, David Abdurachmanov wrote:
>
> > Yes, I do see those in Fedora/RISCV build farm every morning, but
> > with
> > riscv64 and 5.2.0-rc7 kernel.
>
> [...]
>
> > fedora-riscv-4 login: [178876.406122] Unable to handle kernel
> > paging
> > request at virtual address 0000000000012a28
> > fedora-riscv-7 login: [17983.074847] Unable to handle kernel paging
> > request at virtual address 0fffffdff5e14700
>
> Alistair, you're seeing panics immediately after the userspace
> transition,
> right? 100% of the time?

Yes, just after init (systemd) is started. I see this 100% of the time
with 32-bit RISC-V.

Here is an updated log with a little more context:

[ 1.227072] EXT4-fs (vda): mounted filesystem with ordered data
mode. Opts: (null)
[ 1.228148] VFS: Mounted root (ext4 filesystem) on device 254:0.
[ 1.274486] Freeing unused kernel memory: 192K
[ 1.274788] This architecture does not have kernel memory
protection.
[ 1.275298] Run /sbin/init as init process
[ 1.682749] systemd[1]: systemd 242-19-gdb2e367+ running in system
mode. (-PAM -AUDIT -SELINUX +IMA -APPARMOR +SMACK +SYSVINIT +UTMP
-LIBCRYPTSETUP -GCRYPT -GNUTLS +ACL +XZ -LZ4 -SECCOMP +BLKID -ELFUTILS
+KMOD -IDN2 -IDN -PCRE2 default-hierarchy=hybrid)
[ 1.685536] Unable to handle kernel paging request at virtual
address 9ff00c15
[ 1.686160] Oops [#1]
[ 1.686409] Modules linked in:
[ 1.686826] CPU: 0 PID: 1 Comm: systemd Not tainted 5.3.0-rc4 #1
[ 1.687388] sepc: c05d2f74 ra : c04bd60c sp : df04fce0
[ 1.687817] gp : c07af4a8 tp : df050000 t0 : 000000fc
[ 1.688329] t1 : 00000002 t2 : 000003ef s0 : df04fcf0
[ 1.688763] s1 : df7090f8 a0 : 9ff00c15 a1 : c072f4a8
[ 1.689186] a2 : 00000000 a3 : 00000001 a4 : 00000001
[ 1.689587] a5 : df6f8138 a6 : 0000002f a7 : de62a000
[ 1.689970] s2 : c072f4a8 s3 : 00000000 s4 : 00000000
[ 1.690355] s5 : c07b1000 s6 : 00400cc0 s7 : 00000400
[ 1.690732] s8 : de496018 s9 : 00000000 s10: fffff000
[ 1.691114] s11: de496030 t3 : de62b000 t4 : 00000000
[ 1.691491] t5 : 00000000 t6 : 00000080
[ 1.691797] sstatus: 00000100 sbadaddr: 9ff00c15 scause: 0000000d
[ 1.692861] ---[ end trace 7aed3616cacc20ea ]---
[ 1.695358] Kernel panic - not syncing: Attempted to kill init!
exitcode=0x0000000b
[ 1.696158] ---[ end Kernel panic - not syncing: Attempted to kill
init! exitcode=0x0000000b ]---

>
> If so, this is probably a different bug. Most likely the TLB
> flushing
> issue.

I'm not sure. I have tried with Atish's OpenSBI and kernel patches but
that didn't help.

Reverting just this patch does fully fix the problem though.

Alistair

>
>
> - Paul

2019-08-16 01:07:51

by Anup Patel

[permalink] [raw]
Subject: Re: [PATCH v5 2/2] RISC-V: Setup initial page tables in two stages

On Fri, Aug 16, 2019 at 12:27 AM Alistair Francis
<[email protected]> wrote:
>
> On Wed, 2019-07-10 at 17:05 -0700, Paul Walmsley wrote:
> > On Fri, 7 Jun 2019, Anup Patel wrote:
> >
> > > Currently, the setup_vm() does initial page table setup in one-shot
> > > very early before enabling MMU. Due to this, the setup_vm() has to
> > > map
> > > all possible kernel virtual addresses since it does not know size
> > > and
> > > location of RAM. This means we have kernel mappings for non-
> > > existent
> > > RAM and any buggy driver (or kernel) code doing out-of-bound access
> > > to RAM will not fault and cause underterministic behaviour.
> > >
> > > Further, the setup_vm() creates PMD mappings (i.e. 2M mappings) for
> > > RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000 (i.e.
> > > MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages (i.e.
> > > 516 KB) of memory for initial page tables which is never freed. The
> > > memory required for initial page tables will further increase if
> > > we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000)
> > >
> > > This patch implements two-staged initial page table setup, as
> > > follows:
> > > 1. Early (i.e. setup_vm()): This stage maps kernel image and DTB in
> > > a early page table (i.e. early_pg_dir). The early_pg_dir will be
> > > used
> > > only by boot HART so it can be freed as-part of init memory free-
> > > up.
> > > 2. Final (i.e. setup_vm_final()): This stage maps all possible RAM
> > > banks in the final page table (i.e. swapper_pg_dir). The boot HART
> > > will start using swapper_pg_dir at the end of setup_vm_final(). All
> > > non-boot HARTs directly use the swapper_pg_dir created by boot
> > > HART.
> > >
> > > We have following advantages with this new approach:
> > > 1. Kernel mappings for non-existent RAM don't exists anymore.
> > > 2. Memory consumed by initial page tables is now indpendent of the
> > > chosen PAGE_OFFSET.
> > > 3. Memory consumed by initial page tables on RV64 system is 2 pages
> > > (i.e. 8 KB) which has significantly reduced and these pages will be
> > > freed as-part of the init memory free-up.
> > >
> > > The patch also provides a foundation for implementing strict kernel
> > > mappings where we protect kernel text and rodata using PTE
> > > permissions.
> > >
> > > Suggested-by: Mike Rapoport <[email protected]>
> > > Signed-off-by: Anup Patel <[email protected]>
> >
> > Thanks, updated to apply and to fix a checkpatch warning, and
> > queued.
> >
> > This may not make it in for v5.3-rc1; if not, we'll submit it later.
>
> I'm seeing this failure on RV32 which I bisected to this patch:
>
> [ 1.820461] systemd[1]: systemd 242-19-gdb2e367+ running in system
> mode. (-PAM -AUDIT -SELINUX +IMA -APPARMOR +SMACK +SYSVINIT +UTMP
> -LIBCRYPTSETUP -GCRYPT -GNUTLS +ACL +XZ -LZ4 -SECCOMP +BLKID -ELFUTILS
> +KMOD -IDN2 -IDN -PCRE2 default-hierarchy=hybrid)
> [ 1.824320] Unable to handle kernel paging request at virtual
> address 9ff00c15
> [ 1.824973] Oops [#1]
> [ 1.825162] Modules linked in:
> [ 1.825536] CPU: 0 PID: 1 Comm: systemd Not tainted 5.2.0-rc7 #1
> [ 1.826039] sepc: c05c3c78 ra : c04b5a74 sp : df047ce0
> [ 1.826514] gp : c07a1038 tp : df04c000 t0 : 000000fc
> [ 1.826919] t1 : 00000002 t2 : 000003ef s0 : df047cf0
> [ 1.827322] s1 : df7090f8 a0 : 9ff00c15 a1 : c072166c
> [ 1.827723] a2 : 00000000 a3 : 00000001 a4 : 00000001
> [ 1.828104] a5 : df6f8138 a6 : 0000002f a7 : de62a000
> [ 1.828534] s2 : c072166c s3 : 00000000 s4 : 00000000
> [ 1.828931] s5 : c07a2000 s6 : 00400cc0 s7 : 00000400
> [ 1.829319] s8 : de491018 s9 : 00000000 s10: fffff000
> [ 1.829702] s11: de491030 t3 : de62b000 t4 : 00000000
> [ 1.830090] t5 : 00000000 t6 : 00000080
> [ 1.830392] sstatus: 00000100 sbadaddr: 9ff00c15 scause: 0000000d
> [ 1.831616] ---[ end trace 49a926a1a5300c00 ]---
> [ 1.835776] Kernel panic - not syncing: Attempted to kill init!
> exitcode=0x0000000b
> [ 1.836575] ---[ end Kernel panic - not syncing: Attempted to kill
> init! exitcode=0x0000000b ]---
>
> Does anyone else see this?
>
> A simple revert of this patch on 5.3-rc4 fixes the issue for me.

It looks like this patch is exposing some other bug of Linux RISC-V
32bit kernel.

We will be hiding the actual issue by reverting this patch because
previously we were mapping all possible kernel virtual addresses
even for non-existent RAM (after RAM ends).

Let me debug this more.

Regards,
Anup

>
> Alistair
>
> >
> >
> > - Paul
> >
> > _______________________________________________
> > linux-riscv mailing list
> > [email protected]
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv