2020-07-29 23:40:37

by Atish Patra

[permalink] [raw]
Subject: [RFT PATCH v4 3/9] RISC-V: Implement late mapping page table allocation functions

Currently, page table setup is done during setup_va_final where fixmap can
be used to create the temporary mappings. The physical frame is allocated
from memblock_alloc_* functions. However, this won't work if page table
mapping needs to be created for a different mm context (i.e. efi mm) at
a later point of time.

Use generic kernel page allocation function & macros for any mapping
after setup_vm_final.

Signed-off-by: Atish Patra <[email protected]>
---
arch/riscv/mm/init.c | 29 ++++++++++++++++++++++++-----
1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 68c608a0e91f..cba03fec08c1 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -212,6 +212,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
static bool mmu_enabled;
+static bool late_mapping;

#define MAX_EARLY_MAPPING_SIZE SZ_128M

@@ -236,7 +237,9 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)

static pte_t *__init get_pte_virt(phys_addr_t pa)
{
- if (mmu_enabled) {
+ if (late_mapping)
+ return (pte_t *) __va(pa);
+ else if (mmu_enabled) {
clear_fixmap(FIX_PTE);
return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
} else {
@@ -246,13 +249,19 @@ static pte_t *__init get_pte_virt(phys_addr_t pa)

static phys_addr_t __init alloc_pte(uintptr_t va)
{
+ unsigned long vaddr;
/*
* We only create PMD or PGD early mappings so we
* should never reach here with MMU disabled.
*/
BUG_ON(!mmu_enabled);
-
- return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (late_mapping) {
+ vaddr = __get_free_page(GFP_KERNEL);
+ if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)))
+ BUG();
+ return __pa(vaddr);
+ } else
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}

static void __init create_pte_mapping(pte_t *ptep,
@@ -281,7 +290,9 @@ pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);

static pmd_t *__init get_pmd_virt(phys_addr_t pa)
{
- if (mmu_enabled) {
+ if (late_mapping)
+ return (pmd_t *) __va(pa);
+ else if (mmu_enabled) {
clear_fixmap(FIX_PMD);
return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
} else {
@@ -292,8 +303,13 @@ static pmd_t *__init get_pmd_virt(phys_addr_t pa)
static phys_addr_t __init alloc_pmd(uintptr_t va)
{
uintptr_t pmd_num;
+ unsigned long vaddr;

- if (mmu_enabled)
+ if (late_mapping) {
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr);
+ return __pa(vaddr);
+ } else if (mmu_enabled)
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);

pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
@@ -533,6 +549,9 @@ static void __init setup_vm_final(void)
/* Move to swapper page table */
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all();
+
+ /* generic page allocation function must be used to setup page table */
+ late_mapping = true;
}
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
--
2.24.0


2020-07-30 09:17:58

by Mike Rapoport

[permalink] [raw]
Subject: Re: [RFT PATCH v4 3/9] RISC-V: Implement late mapping page table allocation functions

Hi,

On Wed, Jul 29, 2020 at 04:36:29PM -0700, Atish Patra wrote:
> Currently, page table setup is done during setup_va_final where fixmap can
> be used to create the temporary mappings. The physical frame is allocated
> from memblock_alloc_* functions. However, this won't work if page table
> mapping needs to be created for a different mm context (i.e. efi mm) at
> a later point of time.

TBH, I'm not very happy to see pte/pmd allocations, but I don't see a
way to reuse the existing routines in this case.

As a general wild idea, maybe it's worth using something like

struct pt_alloc_ops {
pte_t *(*get_pte_virt)(phys_addr_t pa);
phys_addr_t (*alloc_pte)(uintptr_t va);
...
};

and add there implementations: nommu, MMU early and MMU late.

Some more comments below.

> Use generic kernel page allocation function & macros for any mapping
> after setup_vm_final.
>
> Signed-off-by: Atish Patra <[email protected]>
> ---
> arch/riscv/mm/init.c | 29 ++++++++++++++++++++++++-----
> 1 file changed, 24 insertions(+), 5 deletions(-)
>
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 68c608a0e91f..cba03fec08c1 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -212,6 +212,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
> pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
> pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
> static bool mmu_enabled;
> +static bool late_mapping;
>
> #define MAX_EARLY_MAPPING_SIZE SZ_128M
>
> @@ -236,7 +237,9 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
>
> static pte_t *__init get_pte_virt(phys_addr_t pa)
> {
> - if (mmu_enabled) {
> + if (late_mapping)
> + return (pte_t *) __va(pa);
> + else if (mmu_enabled) {
> clear_fixmap(FIX_PTE);
> return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
> } else {
> @@ -246,13 +249,19 @@ static pte_t *__init get_pte_virt(phys_addr_t pa)
>
> static phys_addr_t __init alloc_pte(uintptr_t va)
> {
> + unsigned long vaddr;
> /*
> * We only create PMD or PGD early mappings so we
> * should never reach here with MMU disabled.
> */
> BUG_ON(!mmu_enabled);
> -
> - return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
> + if (late_mapping) {
> + vaddr = __get_free_page(GFP_KERNEL);
> + if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)))
> + BUG();

Is BUG() here really necessary? If I understand correctly, this would be
used to enable mappings for EFI runtime services, so we probably want to
propagete the error to to caller and, if really necessary, BUG() there,
don't we?

> + return __pa(vaddr);
> + } else
> + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
> }
>
> static void __init create_pte_mapping(pte_t *ptep,
> @@ -281,7 +290,9 @@ pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);
>
> static pmd_t *__init get_pmd_virt(phys_addr_t pa)
> {
> - if (mmu_enabled) {
> + if (late_mapping)
> + return (pmd_t *) __va(pa);
> + else if (mmu_enabled) {
> clear_fixmap(FIX_PMD);
> return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
> } else {
> @@ -292,8 +303,13 @@ static pmd_t *__init get_pmd_virt(phys_addr_t pa)
> static phys_addr_t __init alloc_pmd(uintptr_t va)
> {
> uintptr_t pmd_num;
> + unsigned long vaddr;
>
> - if (mmu_enabled)
> + if (late_mapping) {
> + vaddr = __get_free_page(GFP_KERNEL);
> + BUG_ON(!vaddr);
> + return __pa(vaddr);

Does nommu also need to allocate a page for pmd?

> + } else if (mmu_enabled)
> return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
>
> pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
> @@ -533,6 +549,9 @@ static void __init setup_vm_final(void)
> /* Move to swapper page table */
> csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
> local_flush_tlb_all();
> +
> + /* generic page allocation function must be used to setup page table */
> + late_mapping = true;
> }
> #else
> asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> --
> 2.24.0
>

--
Sincerely yours,
Mike.

2020-08-03 00:57:00

by Atish Patra

[permalink] [raw]
Subject: Re: [RFT PATCH v4 3/9] RISC-V: Implement late mapping page table allocation functions

On Thu, Jul 30, 2020 at 2:14 AM Mike Rapoport <[email protected]> wrote:
>
> Hi,
>
> On Wed, Jul 29, 2020 at 04:36:29PM -0700, Atish Patra wrote:
> > Currently, page table setup is done during setup_va_final where fixmap can
> > be used to create the temporary mappings. The physical frame is allocated
> > from memblock_alloc_* functions. However, this won't work if page table
> > mapping needs to be created for a different mm context (i.e. efi mm) at
> > a later point of time.
>
> TBH, I'm not very happy to see pte/pmd allocations, but I don't see a
> way to reuse the existing routines in this case.
>
> As a general wild idea, maybe it's worth using something like
>
> struct pt_alloc_ops {
> pte_t *(*get_pte_virt)(phys_addr_t pa);
> phys_addr_t (*alloc_pte)(uintptr_t va);
> ...
> };
>
> and add there implementations: nommu, MMU early and MMU late.
>

Yeah. That will be much cleaner. Thanks. I will change it to have
function pointers
instead of if else blocks.

> Some more comments below.
>
> > Use generic kernel page allocation function & macros for any mapping
> > after setup_vm_final.
> >
> > Signed-off-by: Atish Patra <[email protected]>
> > ---
> > arch/riscv/mm/init.c | 29 ++++++++++++++++++++++++-----
> > 1 file changed, 24 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > index 68c608a0e91f..cba03fec08c1 100644
> > --- a/arch/riscv/mm/init.c
> > +++ b/arch/riscv/mm/init.c
> > @@ -212,6 +212,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
> > pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
> > pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
> > static bool mmu_enabled;
> > +static bool late_mapping;
> >
> > #define MAX_EARLY_MAPPING_SIZE SZ_128M
> >
> > @@ -236,7 +237,9 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
> >
> > static pte_t *__init get_pte_virt(phys_addr_t pa)
> > {
> > - if (mmu_enabled) {
> > + if (late_mapping)
> > + return (pte_t *) __va(pa);
> > + else if (mmu_enabled) {
> > clear_fixmap(FIX_PTE);
> > return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
> > } else {
> > @@ -246,13 +249,19 @@ static pte_t *__init get_pte_virt(phys_addr_t pa)
> >
> > static phys_addr_t __init alloc_pte(uintptr_t va)
> > {
> > + unsigned long vaddr;
> > /*
> > * We only create PMD or PGD early mappings so we
> > * should never reach here with MMU disabled.
> > */
> > BUG_ON(!mmu_enabled);
> > -
> > - return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
> > + if (late_mapping) {
> > + vaddr = __get_free_page(GFP_KERNEL);
> > + if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)))
> > + BUG();
>
> Is BUG() here really necessary? If I understand correctly, this would be
> used to enable mappings for EFI runtime services, so we probably want to
> propagete the error to to caller and, if really necessary, BUG() there,
> don't we?
>

If __get_free_page is failing here, something is seriously wrong and
the system should panic.
But I agree with you that this should happen in the caller rather than
the callee.
We need to change the signature of each create_*_mapping function
in order to propagate the error to the caller. Do you see any issues with that ?
As it will affect page table allocation for all three cases, should we
consolidate all the BUG()
cases in caller only ?

> > + return __pa(vaddr);
> > + } else
> > + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
> > }
> >
> > static void __init create_pte_mapping(pte_t *ptep,
> > @@ -281,7 +290,9 @@ pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);
> >
> > static pmd_t *__init get_pmd_virt(phys_addr_t pa)
> > {
> > - if (mmu_enabled) {
> > + if (late_mapping)
> > + return (pmd_t *) __va(pa);
> > + else if (mmu_enabled) {
> > clear_fixmap(FIX_PMD);
> > return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
> > } else {
> > @@ -292,8 +303,13 @@ static pmd_t *__init get_pmd_virt(phys_addr_t pa)
> > static phys_addr_t __init alloc_pmd(uintptr_t va)
> > {
> > uintptr_t pmd_num;
> > + unsigned long vaddr;
> >
> > - if (mmu_enabled)
> > + if (late_mapping) {
> > + vaddr = __get_free_page(GFP_KERNEL);
> > + BUG_ON(!vaddr);
> > + return __pa(vaddr);
>
> Does nommu also need to allocate a page for pmd?
>

Not sure if I understand the question correctly. Before MMU is enabled,
we are using statically allocated early_pmd.

> > + } else if (mmu_enabled)
> > return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
> >
> > pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
> > @@ -533,6 +549,9 @@ static void __init setup_vm_final(void)
> > /* Move to swapper page table */
> > csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
> > local_flush_tlb_all();
> > +
> > + /* generic page allocation function must be used to setup page table */
> > + late_mapping = true;
> > }
> > #else
> > asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> > --
> > 2.24.0
> >
>
> --
> Sincerely yours,
> Mike.



--
Regards,
Atish