Received: by 2002:a25:ef43:0:0:0:0:0 with SMTP id w3csp14593ybm; Tue, 26 May 2020 09:35:21 -0700 (PDT) X-Google-Smtp-Source: ABdhPJxlOStbbak/iOqXndpXNBj9GwHC5/FvYe5It3Q+lb6wAmqGYzoUaHrfRdLwwCH7LjvUGUT2 X-Received: by 2002:a50:9a86:: with SMTP id p6mr20838156edb.153.1590510921411; Tue, 26 May 2020 09:35:21 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1590510921; cv=none; d=google.com; s=arc-20160816; b=BbulT633NybYQPLTHLkt13Bzyvq9dlov2Qy4AF9ZyospCRDtLCLmRAxd7MRmQE6mFQ 8Kfya/SQjUuNZZ1TAgyQ0Dn6nL0pk1jAES64tQfhz1Kf3HPcE8bTXqkOZZwny/aN/ne3 8aG3TOJK24NhtAL5veG+x3bivbXq3+THEJ5tLYwzK0yIYyn9qMnCfNr0kNcBSNTnzZwS qFxb7/YZxGORtDQv70W98evcuGIp4WNnzb5aGRqotaQjtx02mrIRBTPeyRN4HEDDPG09 zqXmzmLSJt5fB3+NaWEcIvLmZTDwH1XT871KasaeRg/02aW0Pq/SzrQa3/GvERKVru/j WWSQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=list-id:precedence:sender:content-language :content-transfer-encoding:in-reply-to:mime-version:user-agent:date :message-id:from:references:cc:to:subject; bh=eX6Omh+fgZilHaeKQNyIQ8bBfyLuoGTAjl1KegqrwRE=; b=j0f8j8cBI6VN73vWktZoI0tisOzca10GwvhRGBGvGN7ejVKlyGxRsE5ulktBwda3EG r5urHZTex09fVZtCUDXRQb+nRZNpyPgzSUNB1pWeJonOL9bcAH7rqlYMXidlq8wYLMCt ncNTrHC+kP+MvYNJCC2UTNvQt25Al9GNR9oy93xrla/0kXqwjKmQw2cQ+sMTwuHh05NE PGzrhkze2FDJODJ2j8duErdmyvcvw1YOsix9/4mZa/jLJla7EHVg5kwwCIW7hFmYysn4 mKKO8s0lGHYv6IP19amXBvk/e7naI0zS8pjd0nq9GGOjCPdumtRSb+LAQCc2Xfx1+rU8 VBaQ== ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org Return-Path: Received: from vger.kernel.org (vger.kernel.org. [23.128.96.18]) by mx.google.com with ESMTP id m1si167665ejk.303.2020.05.26.09.34.57; Tue, 26 May 2020 09:35:21 -0700 (PDT) Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) client-ip=23.128.96.18; Authentication-Results: mx.google.com; spf=pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2388682AbgEZQa0 (ORCPT + 99 others); Tue, 26 May 2020 12:30:26 -0400 Received: from relay12.mail.gandi.net ([217.70.178.232]:47575 "EHLO relay12.mail.gandi.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388672AbgEZQa0 (ORCPT ); Tue, 26 May 2020 12:30:26 -0400 Received: from [192.168.1.14] (lfbn-gre-1-325-105.w90-112.abo.wanadoo.fr [90.112.45.105]) (Authenticated sender: alex@ghiti.fr) by relay12.mail.gandi.net (Postfix) with ESMTPSA id A46F5200002; Tue, 26 May 2020 16:30:20 +0000 (UTC) Subject: Re: [PATCH 5/8] riscv: Implement sv48 support To: Anup Patel Cc: Paul Walmsley , Palmer Dabbelt , Zong Li , Christoph Hellwig , linux-riscv , "linux-kernel@vger.kernel.org List" References: <20200524091008.25587-1-alex@ghiti.fr> <20200524091008.25587-6-alex@ghiti.fr> From: Alex Ghiti Message-ID: <9bb64e7f-5308-75fc-d589-17680633038c@ghiti.fr> Date: Tue, 26 May 2020 12:30:20 -0400 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Thunderbird/68.8.1 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 8bit Content-Language: fr Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Le 5/25/20 à 2:45 AM, Anup Patel a écrit : > On Sun, May 24, 2020 at 2:45 PM Alexandre Ghiti wrote: >> By adding a new 4th level of page table, give the possibility to 64bit >> kernel to address 2^48 bytes of virtual address: in practice, that roughly >> offers ~160TB of virtual address space to userspace and allows up to 64TB >> of physical memory. >> >> If the underlying hardware does not support sv48, we will automatically >> fallback to a standard 3-level page table by folding the new PUD level into >> PGDIR level. In order to detect HW capabilities at runtime, we >> use SATP feature that ignores writes with an unsupported mode. >> >> Signed-off-by: Alexandre Ghiti >> --- >> arch/riscv/Kconfig | 6 +- >> arch/riscv/include/asm/csr.h | 3 +- >> arch/riscv/include/asm/fixmap.h | 1 + >> arch/riscv/include/asm/page.h | 15 +++ >> arch/riscv/include/asm/pgalloc.h | 36 +++++++ >> arch/riscv/include/asm/pgtable-64.h | 97 ++++++++++++++++- >> arch/riscv/include/asm/pgtable.h | 9 +- >> arch/riscv/kernel/head.S | 3 +- >> arch/riscv/mm/context.c | 4 +- >> arch/riscv/mm/init.c | 159 +++++++++++++++++++++++++--- >> 10 files changed, 309 insertions(+), 24 deletions(-) >> >> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig >> index e167f16131f4..3f73f60e9732 100644 >> --- a/arch/riscv/Kconfig >> +++ b/arch/riscv/Kconfig >> @@ -68,6 +68,7 @@ config RISCV >> select ARCH_HAS_GCOV_PROFILE_ALL >> select HAVE_COPY_THREAD_TLS >> select HAVE_ARCH_KASAN if MMU && 64BIT >> + select RELOCATABLE if 64BIT >> >> config ARCH_MMAP_RND_BITS_MIN >> default 18 if 64BIT >> @@ -106,7 +107,7 @@ config PAGE_OFFSET >> default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB >> default 0x80000000 if 64BIT && !MMU >> default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB >> - default 0xffffffe000000000 if 64BIT && !MAXPHYSMEM_2GB >> + default 0xffffc00000000000 if 64BIT && !MAXPHYSMEM_2GB >> >> config ARCH_FLATMEM_ENABLE >> def_bool y >> @@ -155,8 +156,11 @@ config GENERIC_HWEIGHT >> config FIX_EARLYCON_MEM >> def_bool MMU >> >> +# On a 64BIT relocatable kernel, the 4-level page table is at runtime folded >> +# on a 3-level page table when sv48 is not supported. >> config PGTABLE_LEVELS >> int >> + default 4 if 64BIT && RELOCATABLE >> default 3 if 64BIT >> default 2 >> >> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h >> index cec462e198ce..d41536c3f8d4 100644 >> --- a/arch/riscv/include/asm/csr.h >> +++ b/arch/riscv/include/asm/csr.h >> @@ -40,11 +40,10 @@ >> #ifndef CONFIG_64BIT >> #define SATP_PPN _AC(0x003FFFFF, UL) >> #define SATP_MODE_32 _AC(0x80000000, UL) >> -#define SATP_MODE SATP_MODE_32 >> #else >> #define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL) >> #define SATP_MODE_39 _AC(0x8000000000000000, UL) >> -#define SATP_MODE SATP_MODE_39 >> +#define SATP_MODE_48 _AC(0x9000000000000000, UL) >> #endif >> >> /* Exception cause high bit - is an interrupt if set */ >> diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h >> index 2368d49eb4ef..d891cf9c73c5 100644 >> --- a/arch/riscv/include/asm/fixmap.h >> +++ b/arch/riscv/include/asm/fixmap.h >> @@ -27,6 +27,7 @@ enum fixed_addresses { >> FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, >> FIX_PTE, >> FIX_PMD, >> + FIX_PUD, >> FIX_TEXT_POKE1, >> FIX_TEXT_POKE0, >> FIX_EARLYCON_MEM_BASE, >> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h >> index 48bb09b6a9b7..5e77fe7f0d6d 100644 >> --- a/arch/riscv/include/asm/page.h >> +++ b/arch/riscv/include/asm/page.h >> @@ -31,7 +31,19 @@ >> * When not using MMU this corresponds to the first free page in >> * physical memory (aligned on a page boundary). >> */ >> +#ifdef CONFIG_RELOCATABLE >> +#define PAGE_OFFSET __page_offset >> + >> +#ifdef CONFIG_64BIT >> +/* >> + * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so >> + * define the PAGE_OFFSET value for SV39. >> + */ >> +#define PAGE_OFFSET_L3 0xffffffe000000000 >> +#endif /* CONFIG_64BIT */ >> +#else >> #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) >> +#endif /* CONFIG_RELOCATABLE */ >> >> #define KERN_VIRT_SIZE (-PAGE_OFFSET) >> >> @@ -102,6 +114,9 @@ extern unsigned long pfn_base; >> extern unsigned long max_low_pfn; >> extern unsigned long min_low_pfn; >> extern unsigned long kernel_virt_addr; >> +#ifdef CONFIG_RELOCATABLE >> +extern unsigned long __page_offset; >> +#endif >> >> #define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset)) >> #define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset) >> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h >> index 3f601ee8233f..540eaa5a8658 100644 >> --- a/arch/riscv/include/asm/pgalloc.h >> +++ b/arch/riscv/include/asm/pgalloc.h >> @@ -36,6 +36,42 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) >> >> set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); >> } >> + >> +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) >> +{ >> + if (pgtable_l4_enabled) { >> + unsigned long pfn = virt_to_pfn(pud); >> + >> + set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); >> + } >> +} >> + >> +static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, >> + pud_t *pud) >> +{ >> + if (pgtable_l4_enabled) { >> + unsigned long pfn = virt_to_pfn(pud); >> + >> + set_p4d_safe(p4d, >> + __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); >> + } >> +} >> + >> +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) >> +{ >> + if (pgtable_l4_enabled) >> + return (pud_t *)__get_free_page( >> + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO); >> + return NULL; >> +} >> + >> +static inline void pud_free(struct mm_struct *mm, pud_t *pud) >> +{ >> + if (pgtable_l4_enabled) >> + free_page((unsigned long)pud); >> +} >> + >> +#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud) >> #endif /* __PAGETABLE_PMD_FOLDED */ >> >> #define pmd_pgtable(pmd) pmd_page(pmd) >> diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h >> index b15f70a1fdfa..c84c31fbf8da 100644 >> --- a/arch/riscv/include/asm/pgtable-64.h >> +++ b/arch/riscv/include/asm/pgtable-64.h >> @@ -8,16 +8,32 @@ >> >> #include >> >> -#define PGDIR_SHIFT 30 >> +extern bool pgtable_l4_enabled; >> + >> +#define PGDIR_SHIFT (pgtable_l4_enabled ? 39 : 30) >> /* Size of region mapped by a page global directory */ >> #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) >> #define PGDIR_MASK (~(PGDIR_SIZE - 1)) >> >> +/* pud is folded into pgd in case of 3-level page table */ >> +#define PUD_SHIFT 30 >> +#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) >> +#define PUD_MASK (~(PUD_SIZE - 1)) >> + >> #define PMD_SHIFT 21 >> /* Size of region mapped by a page middle directory */ >> #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) >> #define PMD_MASK (~(PMD_SIZE - 1)) >> >> +/* Page Upper Directory entry */ >> +typedef struct { >> + unsigned long pud; >> +} pud_t; >> + >> +#define pud_val(x) ((x).pud) >> +#define __pud(x) ((pud_t) { (x) }) >> +#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t)) >> + >> /* Page Middle Directory entry */ >> typedef struct { >> unsigned long pmd; >> @@ -60,6 +76,16 @@ static inline void pud_clear(pud_t *pudp) >> set_pud(pudp, __pud(0)); >> } >> >> +static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot) >> +{ >> + return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); >> +} >> + >> +static inline unsigned long _pud_pfn(pud_t pud) >> +{ >> + return pud_val(pud) >> _PAGE_PFN_SHIFT; >> +} >> + >> static inline unsigned long pud_page_vaddr(pud_t pud) >> { >> return (unsigned long)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT); >> @@ -70,6 +96,15 @@ static inline struct page *pud_page(pud_t pud) >> return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT); >> } >> >> +#define mm_pud_folded mm_pud_folded >> +static inline bool mm_pud_folded(struct mm_struct *mm) >> +{ >> + if (pgtable_l4_enabled) >> + return false; >> + >> + return true; >> +} >> + >> #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) >> >> static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) >> @@ -90,4 +125,64 @@ static inline unsigned long _pmd_pfn(pmd_t pmd) >> #define pmd_ERROR(e) \ >> pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) >> >> +#define pud_ERROR(e) \ >> + pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) >> + >> +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) >> +{ >> + if (pgtable_l4_enabled) >> + *p4dp = p4d; >> + else >> + set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) }); >> +} >> + >> +static inline int p4d_none(p4d_t p4d) >> +{ >> + if (pgtable_l4_enabled) >> + return (p4d_val(p4d) == 0); >> + >> + return 0; >> +} >> + >> +static inline int p4d_present(p4d_t p4d) >> +{ >> + if (pgtable_l4_enabled) >> + return (p4d_val(p4d) & _PAGE_PRESENT); >> + >> + return 1; >> +} >> + >> +static inline int p4d_bad(p4d_t p4d) >> +{ >> + if (pgtable_l4_enabled) >> + return !p4d_present(p4d); >> + >> + return 0; >> +} >> + >> +static inline void p4d_clear(p4d_t *p4d) >> +{ >> + if (pgtable_l4_enabled) >> + set_p4d(p4d, __p4d(0)); >> +} >> + >> +static inline unsigned long p4d_page_vaddr(p4d_t p4d) >> +{ >> + if (pgtable_l4_enabled) >> + return (unsigned long)pfn_to_virt( >> + p4d_val(p4d) >> _PAGE_PFN_SHIFT); >> + >> + return pud_page_vaddr((pud_t) { p4d_val(p4d) }); >> +} >> + >> +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) >> + >> +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) >> +{ >> + if (pgtable_l4_enabled) >> + return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); >> + >> + return (pud_t *)p4d; >> +} >> + >> #endif /* _ASM_RISCV_PGTABLE_64_H */ >> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h >> index 8e96315b3366..b8a8ba69d0a2 100644 >> --- a/arch/riscv/include/asm/pgtable.h >> +++ b/arch/riscv/include/asm/pgtable.h >> @@ -20,12 +20,14 @@ >> * the kernel. >> */ >> #define KERNEL_VIRT_ADDR (VMALLOC_END - SZ_2G + 1) >> -#define KERNEL_LINK_ADDR KERNEL_VIRT_ADDR >> +#define KERNEL_LINK_ADDR (VMALLOC_LINK_END - SZ_2G + 1) >> >> #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) >> #define VMALLOC_END (PAGE_OFFSET - 1) >> #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) >> >> +#define VMALLOC_LINK_END (_AC(CONFIG_PAGE_OFFSET, UL) - 1) >> + >> #define BPF_JIT_REGION_SIZE (SZ_128M) >> #define BPF_JIT_REGION_START (kernel_virt_addr) >> #define BPF_JIT_REGION_END (kernel_virt_addr + BPF_JIT_REGION_SIZE) >> @@ -67,8 +69,7 @@ >> >> #ifndef __ASSEMBLY__ >> >> -/* Page Upper Directory not used in RISC-V */ >> -#include >> +#include >> #include >> #include >> #include >> @@ -81,7 +82,7 @@ >> >> #ifdef CONFIG_MMU >> #ifdef CONFIG_64BIT >> -#define VA_BITS 39 >> +#define VA_BITS (pgtable_l4_enabled ? 48 : 39) >> #define PA_BITS 56 >> #else >> #define VA_BITS 32 >> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S >> index 8f5bb7731327..0632c4834c68 100644 >> --- a/arch/riscv/kernel/head.S >> +++ b/arch/riscv/kernel/head.S >> @@ -62,7 +62,8 @@ relocate: >> >> /* Compute satp for kernel page tables, but don't load it yet */ >> srl a2, a0, PAGE_SHIFT >> - li a1, SATP_MODE >> + la a1, satp_mode >> + REG_L a1, 0(a1) >> or a2, a2, a1 >> >> /* >> diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c >> index 613ec81a8979..6830504f8b11 100644 >> --- a/arch/riscv/mm/context.c >> +++ b/arch/riscv/mm/context.c >> @@ -9,6 +9,8 @@ >> #include >> #include >> >> +extern u64 satp_mode; > Please move this to asm/pgtable.h next to "extern void *dtb_early_va". > > Same thing can be done for "pgtable_l4_enabled" to help PATCH7. > > I forgot to mention this in previous emails. Ok, I'll do that in v2 too, thanks. Anup, do you have time to take a look at the relocatable series I have posted earlier ? As sv48 support depends on that, it would be nice to have your review too. Thanks, Alex > > Regards, > Anup > > > >> + >> /* >> * When necessary, performs a deferred icache flush for the given MM context, >> * on the local CPU. RISC-V has no direct mechanism for instruction cache >> @@ -59,7 +61,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, >> cpumask_set_cpu(cpu, mm_cpumask(next)); >> >> #ifdef CONFIG_MMU >> - csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE); >> + csr_write(CSR_SATP, virt_to_pfn(next->pgd) | satp_mode); >> local_flush_tlb_all(); >> #endif >> >> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c >> index 5782cae58ac2..bad8da099ff6 100644 >> --- a/arch/riscv/mm/init.c >> +++ b/arch/riscv/mm/init.c >> @@ -25,8 +25,23 @@ >> >> #include "../kernel/head.h" >> >> -unsigned long kernel_virt_addr = KERNEL_VIRT_ADDR; >> +#ifdef CONFIG_64BIT >> +u64 satp_mode = IS_ENABLED(CONFIG_MAXPHYSMEM_2GB) ? >> + SATP_MODE_39 : SATP_MODE_48; >> +bool pgtable_l4_enabled = IS_ENABLED(CONFIG_MAXPHYSMEM_2GB) ? false : true; >> +#else >> +u64 satp_mode = SATP_MODE_32; >> +bool pgtable_l4_enabled; >> +#endif >> +EXPORT_SYMBOL(pgtable_l4_enabled); >> +EXPORT_SYMBOL(satp_mode); >> + >> +unsigned long kernel_virt_addr; >> EXPORT_SYMBOL(kernel_virt_addr); >> +#ifdef CONFIG_RELOCATABLE >> +unsigned long __page_offset = _AC(CONFIG_PAGE_OFFSET, UL); >> +EXPORT_SYMBOL(__page_offset); >> +#endif >> >> unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] >> __page_aligned_bss; >> @@ -254,9 +269,12 @@ static void __init create_pte_mapping(pte_t *ptep, >> >> #ifndef __PAGETABLE_PMD_FOLDED >> >> +pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss; >> pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; >> +pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss; >> pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; >> pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); >> +pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); >> >> static pmd_t *__init get_pmd_virt(phys_addr_t pa) >> { >> @@ -273,7 +291,8 @@ static phys_addr_t __init alloc_pmd(uintptr_t va) >> if (mmu_enabled) >> return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); >> >> - BUG_ON((va - kernel_virt_addr) >> PGDIR_SHIFT); >> + /* Only one PMD is available for early mapping */ >> + BUG_ON((va - kernel_virt_addr) >> PUD_SHIFT); >> >> return (uintptr_t)early_pmd; >> } >> @@ -305,19 +324,70 @@ static void __init create_pmd_mapping(pmd_t *pmdp, >> create_pte_mapping(ptep, va, pa, sz, prot); >> } >> >> -#define pgd_next_t pmd_t >> -#define alloc_pgd_next(__va) alloc_pmd(__va) >> -#define get_pgd_next_virt(__pa) get_pmd_virt(__pa) >> +static pud_t *__init get_pud_virt(phys_addr_t pa) >> +{ >> + if (mmu_enabled) { >> + clear_fixmap(FIX_PUD); >> + return (pud_t *)set_fixmap_offset(FIX_PUD, pa); >> + } else { >> + return (pud_t *)((uintptr_t)pa); >> + } >> +} >> + >> +static phys_addr_t __init alloc_pud(uintptr_t va) >> +{ >> + if (mmu_enabled) >> + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); >> + >> + /* Only one PUD is available for early mapping */ >> + BUG_ON((va - kernel_virt_addr) >> PGDIR_SHIFT); >> + >> + return (uintptr_t)early_pud; >> +} >> + >> +static void __init create_pud_mapping(pud_t *pudp, >> + uintptr_t va, phys_addr_t pa, >> + phys_addr_t sz, pgprot_t prot) >> +{ >> + pmd_t *nextp; >> + phys_addr_t next_phys; >> + uintptr_t pud_index = pud_index(va); >> + >> + if (sz == PUD_SIZE) { >> + if (pud_val(pudp[pud_index]) == 0) >> + pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot); >> + return; >> + } >> + >> + if (pud_val(pudp[pud_index]) == 0) { >> + next_phys = alloc_pmd(va); >> + pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE); >> + nextp = get_pmd_virt(next_phys); >> + memset(nextp, 0, PAGE_SIZE); >> + } else { >> + next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index])); >> + nextp = get_pmd_virt(next_phys); >> + } >> + >> + create_pmd_mapping(nextp, va, pa, sz, prot); >> +} >> + >> +#define pgd_next_t pud_t >> +#define alloc_pgd_next(__va) alloc_pud(__va) >> +#define get_pgd_next_virt(__pa) get_pud_virt(__pa) >> #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ >> - create_pmd_mapping(__nextp, __va, __pa, __sz, __prot) >> -#define fixmap_pgd_next fixmap_pmd >> + create_pud_mapping(__nextp, __va, __pa, __sz, __prot) >> +#define fixmap_pgd_next (pgtable_l4_enabled ? \ >> + (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd) >> +#define trampoline_pgd_next (pgtable_l4_enabled ? \ >> + (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd) >> #else >> #define pgd_next_t pte_t >> #define alloc_pgd_next(__va) alloc_pte(__va) >> #define get_pgd_next_virt(__pa) get_pte_virt(__pa) >> #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ >> create_pte_mapping(__nextp, __va, __pa, __sz, __prot) >> -#define fixmap_pgd_next fixmap_pte >> +#define fixmap_pgd_next ((uintptr_t)fixmap_pte) >> #endif >> >> static void __init create_pgd_mapping(pgd_t *pgdp, >> @@ -328,6 +398,13 @@ static void __init create_pgd_mapping(pgd_t *pgdp, >> phys_addr_t next_phys; >> uintptr_t pgd_index = pgd_index(va); >> >> +#ifndef __PAGETABLE_PMD_FOLDED >> + if (!pgtable_l4_enabled) { >> + create_pud_mapping((pud_t *)pgdp, va, pa, sz, prot); >> + return; >> + } >> +#endif >> + >> if (sz == PGDIR_SIZE) { >> if (pgd_val(pgdp[pgd_index]) == 0) >> pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pa), prot); >> @@ -419,6 +496,47 @@ void __init relocate_kernel(uintptr_t load_pa) >> } >> } >> >> +#if defined(CONFIG_64BIT) && !defined(CONFIG_MAXPHYSMEM_2GB) >> +void disable_pgtable_l4(void) >> +{ >> + pgtable_l4_enabled = false; >> + __page_offset = PAGE_OFFSET_L3; >> + satp_mode = SATP_MODE_39; >> +} >> + >> +/* There is a simple way to determine if 4-level is supported by the >> + * underlying hardware: establish 1:1 mapping in 4-level page table mode >> + * then read SATP to see if the configuration was taken into account >> + * meaning sv48 is supported. >> + */ >> +asmlinkage __init void set_satp_mode(uintptr_t load_pa) >> +{ >> + u64 identity_satp, hw_satp; >> + int cpus_node; >> + >> + create_pgd_mapping(early_pg_dir, load_pa, (uintptr_t)early_pud, >> + PGDIR_SIZE, PAGE_TABLE); >> + create_pud_mapping(early_pud, load_pa, (uintptr_t)early_pmd, >> + PUD_SIZE, PAGE_TABLE); >> + create_pmd_mapping(early_pmd, load_pa, load_pa, >> + PMD_SIZE, PAGE_KERNEL_EXEC); >> + >> + identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode; >> + local_flush_tlb_all(); >> + csr_write(CSR_SATP, identity_satp); >> + >> + hw_satp = csr_read(CSR_SATP); >> + csr_write(CSR_SATP, 0ULL); >> + local_flush_tlb_all(); >> + >> + if (hw_satp != identity_satp) >> + disable_pgtable_l4(); >> + >> + memset(early_pg_dir, 0, PAGE_SIZE); >> + memset(early_pud, 0, PAGE_SIZE); >> + memset(early_pmd, 0, PAGE_SIZE); >> +} >> +#endif >> #endif >> >> static uintptr_t load_pa, load_sz; >> @@ -442,9 +560,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) >> load_pa = (uintptr_t)(&_start); >> load_sz = (uintptr_t)(&_end) - load_pa; >> >> +#if defined(CONFIG_64BIT) && !defined(CONFIG_MAXPHYSMEM_2GB) >> + set_satp_mode(load_pa); >> +#endif >> + >> + kernel_virt_addr = KERNEL_VIRT_ADDR; >> + >> va_pa_offset = PAGE_OFFSET - load_pa; >> va_kernel_pa_offset = kernel_virt_addr - load_pa; >> - >> pfn_base = PFN_DOWN(load_pa); >> >> #ifdef CONFIG_RELOCATABLE >> @@ -473,15 +596,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) >> >> /* Setup early PGD for fixmap */ >> create_pgd_mapping(early_pg_dir, FIXADDR_START, >> - (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); >> + fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); >> >> #ifndef __PAGETABLE_PMD_FOLDED >> - /* Setup fixmap PMD */ >> + /* Setup fixmap PUD and PMD */ >> + if (pgtable_l4_enabled) >> + create_pud_mapping(fixmap_pud, FIXADDR_START, >> + (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE); >> create_pmd_mapping(fixmap_pmd, FIXADDR_START, >> (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); >> + >> /* Setup trampoline PGD and PMD */ >> create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr, >> - (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE); >> + trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE); >> + if (pgtable_l4_enabled) >> + create_pud_mapping(trampoline_pud, kernel_virt_addr, >> + (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE); >> create_pmd_mapping(trampoline_pmd, kernel_virt_addr, >> load_pa, PMD_SIZE, PAGE_KERNEL_EXEC); >> #else >> @@ -558,12 +688,13 @@ static void __init setup_vm_final(void) >> >> vm_area_add_early(&vm_kernel); >> >> - /* Clear fixmap PTE and PMD mappings */ >> + /* Clear fixmap page table mappings */ >> clear_fixmap(FIX_PTE); >> clear_fixmap(FIX_PMD); >> + clear_fixmap(FIX_PUD); >> >> /* Move to swapper page table */ >> - csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); >> + csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode); >> local_flush_tlb_all(); >> } >> >> -- >> 2.20.1 >>