From: Anup Patel <[email protected]>
Currently, RISC-V reserves 1MB of fixmap memory for device tree. However,
it maps only single PMD (2MB) space for fixmap which leaves only < 1MB space
left for other kernel features such as early ioremap which requires fixmap
as well. The fixmap size can be increased by another 2MB but it brings
additional complexity and changes the virtual memory layout as well.
If we require some additional feature requiring fixmap again, it has to be
moved again.
Technically, DT doesn't need a fixmap as the memory occupied by the DT is
only used during boot. Thus, init memory section can be used for the same
purpose as well. This simplifies fixmap implementation.
Signed-off-by: Anup Patel <[email protected]>
Signed-off-by: Atish Patra <[email protected]>
---
arch/riscv/include/asm/fixmap.h | 3 ---
arch/riscv/kernel/head.S | 1 -
arch/riscv/kernel/head.h | 2 --
arch/riscv/kernel/setup.c | 8 +++-----
arch/riscv/mm/init.c | 36 ++++++++++++++-------------------
5 files changed, 18 insertions(+), 32 deletions(-)
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 1ff075a8dfc7..11613f38228a 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -22,9 +22,6 @@
*/
enum fixed_addresses {
FIX_HOLE,
-#define FIX_FDT_SIZE SZ_1M
- FIX_FDT_END,
- FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
FIX_PTE,
FIX_PMD,
FIX_TEXT_POKE1,
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index d0c5c316e9bb..eb123eda3663 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -250,7 +250,6 @@ clear_bss_done:
#endif
/* Start the kernel */
call soc_early_init
- call parse_dtb
tail start_kernel
.Lsecondary_start:
diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h
index 105fb0496b24..b48dda3d04f6 100644
--- a/arch/riscv/kernel/head.h
+++ b/arch/riscv/kernel/head.h
@@ -16,6 +16,4 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa);
extern void *__cpu_up_stack_pointer[];
extern void *__cpu_up_task_pointer[];
-void __init parse_dtb(void);
-
#endif /* __ASM_HEAD_H */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index f04373be54a6..8519a6d29857 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -49,8 +49,9 @@ atomic_t hart_lottery __section(.sdata);
unsigned long boot_cpu_hartid;
static DEFINE_PER_CPU(struct cpu, cpu_devices);
-void __init parse_dtb(void)
+static void __init parse_dtb(void)
{
+ /* Early scan of device tree from init memory */
if (early_init_dt_scan(dtb_early_va))
return;
@@ -63,6 +64,7 @@ void __init parse_dtb(void)
void __init setup_arch(char **cmdline_p)
{
+ parse_dtb();
init_mm.start_code = (unsigned long) _stext;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
@@ -74,11 +76,7 @@ void __init setup_arch(char **cmdline_p)
setup_bootmem();
paging_init();
-#if IS_ENABLED(CONFIG_BUILTIN_DTB)
unflatten_and_copy_device_tree();
-#else
- unflatten_device_tree();
-#endif
clint_init_boot_cpu();
#ifdef CONFIG_SWIOTLB
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 79e9d55bdf1a..5e9328fa635b 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -28,7 +28,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
EXPORT_SYMBOL(empty_zero_page);
extern char _start[];
-void *dtb_early_va;
static void __init zone_sizes_init(void)
{
@@ -141,8 +140,6 @@ static void __init setup_initrd(void)
}
#endif /* CONFIG_BLK_DEV_INITRD */
-static phys_addr_t dtb_early_pa __initdata;
-
void __init setup_bootmem(void)
{
struct memblock_region *reg;
@@ -182,13 +179,9 @@ void __init setup_bootmem(void)
setup_initrd();
#endif /* CONFIG_BLK_DEV_INITRD */
- /*
- * Avoid using early_init_fdt_reserve_self() since __pa() does
- * not work for DTB pointers that are fixmap addresses
- */
- memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
-
+ early_init_fdt_reserve_self();
early_init_fdt_scan_reserved_mem();
+
memblock_allow_resize();
memblock_dump_all();
@@ -208,6 +201,10 @@ EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base;
EXPORT_SYMBOL(pfn_base);
+#define DTB_EARLY_SIZE SZ_1M
+static char early_dtb[DTB_EARLY_SIZE] __initdata;
+void *dtb_early_va __initdata = early_dtb;
+
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
@@ -399,7 +396,8 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
- uintptr_t va, end_va;
+ int dtb_size;
+ uintptr_t va, pa, end_va;
uintptr_t load_pa = (uintptr_t)(&_start);
uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);
@@ -448,17 +446,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
load_pa + (va - PAGE_OFFSET),
map_size, PAGE_KERNEL_EXEC);
- /* Create fixed mapping for early FDT parsing */
- end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE;
- for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE)
- create_pte_mapping(fixmap_pte, va,
- dtb_pa + (va - __fix_to_virt(FIX_FDT)),
- PAGE_SIZE, PAGE_KERNEL);
-
- /* Save pointer to DTB for early FDT parsing */
- dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK);
- /* Save physical address for memblock reservation */
- dtb_early_pa = dtb_pa;
+ /* Copy FDT to init memory for early scan */
+ pa = load_pa + ((unsigned long)dtb_early_va - PAGE_OFFSET);
+ dtb_size = fdt_totalsize((void *)dtb_pa);
+ dtb_size = (dtb_size > DTB_EARLY_SIZE) ? DTB_EARLY_SIZE : dtb_size;
+ memcpy((void *)pa, (void *)dtb_pa, dtb_size);
}
static void __init setup_vm_final(void)
@@ -505,6 +497,8 @@ static void __init setup_vm_final(void)
local_flush_tlb_all();
}
#else
+void *dtb_early_va __initdata;
+
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
#ifdef CONFIG_BUILTIN_DTB
--
2.24.0
On Fri, Jul 17, 2020 at 1:41 AM Atish Patra <[email protected]> wrote:
>
> From: Anup Patel <[email protected]>
>
> Currently, RISC-V reserves 1MB of fixmap memory for device tree. However,
> it maps only single PMD (2MB) space for fixmap which leaves only < 1MB space
> left for other kernel features such as early ioremap which requires fixmap
> as well. The fixmap size can be increased by another 2MB but it brings
> additional complexity and changes the virtual memory layout as well.
> If we require some additional feature requiring fixmap again, it has to be
> moved again.
>
> Technically, DT doesn't need a fixmap as the memory occupied by the DT is
> only used during boot. Thus, init memory section can be used for the same
> purpose as well. This simplifies fixmap implementation.
>
> Signed-off-by: Anup Patel <[email protected]>
> Signed-off-by: Atish Patra <[email protected]>
The patch seems fine for the moment, but I have one concern for the
long run:
> +#define DTB_EARLY_SIZE SZ_1M
> +static char early_dtb[DTB_EARLY_SIZE] __initdata;
Hardcoding the size in .bss seems slightly problematic both for
small and for big systems. On machines with very little memory,
this can lead to running out of free pages before .initdata gets freed,
and it increases the size of the uncompressed vmlinux file by quite
a bit.
On some systems, the 1MB limit may get too small. While most dtbs
would fall into the range between 10KB and 100KB, it can also be
much larger than that, e.g. when there are DT properties that include
blobs like device firmware that gets loaded into hardware by a kernel
driver.
Is there anything stopping you from parsing the FDT in its original
location without the extra copy before it gets unflattened?
Arnd
On Thu, Jul 16, 2020 at 11:32 PM Arnd Bergmann <[email protected]> wrote:
>
> On Fri, Jul 17, 2020 at 1:41 AM Atish Patra <[email protected]> wrote:
> >
> > From: Anup Patel <[email protected]>
> >
> > Currently, RISC-V reserves 1MB of fixmap memory for device tree. However,
> > it maps only single PMD (2MB) space for fixmap which leaves only < 1MB space
> > left for other kernel features such as early ioremap which requires fixmap
> > as well. The fixmap size can be increased by another 2MB but it brings
> > additional complexity and changes the virtual memory layout as well.
> > If we require some additional feature requiring fixmap again, it has to be
> > moved again.
> >
> > Technically, DT doesn't need a fixmap as the memory occupied by the DT is
> > only used during boot. Thus, init memory section can be used for the same
> > purpose as well. This simplifies fixmap implementation.
> >
> > Signed-off-by: Anup Patel <[email protected]>
> > Signed-off-by: Atish Patra <[email protected]>
>
> The patch seems fine for the moment, but I have one concern for the
> long run:
>
> > +#define DTB_EARLY_SIZE SZ_1M
> > +static char early_dtb[DTB_EARLY_SIZE] __initdata;
>
> Hardcoding the size in .bss seems slightly problematic both for
> small and for big systems. On machines with very little memory,
> this can lead to running out of free pages before .initdata gets freed,
> and it increases the size of the uncompressed vmlinux file by quite
> a bit.
>
> On some systems, the 1MB limit may get too small. While most dtbs
> would fall into the range between 10KB and 100KB, it can also be
> much larger than that, e.g. when there are DT properties that include
> blobs like device firmware that gets loaded into hardware by a kernel
> driver.
>
I was not aware that we can do such things. Is there a current example of that ?
> Is there anything stopping you from parsing the FDT in its original
> location without the extra copy before it gets unflattened?
>
That's what the original code was doing. A fixmap entry was added to
map the original fdt
location to a virtual so that parse_dtb can be operated on a virtual
address. But we can't map
both FDT & early ioremap within a single PMD region( 2MB ). That's why
we removed the DT
mapping from the fixmap to .bss section. The other alternate option is
to increase the fixmap space to
4MB which seems more fragile.
> Arnd
--
Regards,
Atish
On Sat, Jul 18, 2020 at 3:05 AM Atish Patra <[email protected]> wrote:
> On Thu, Jul 16, 2020 at 11:32 PM Arnd Bergmann <[email protected]> wrote:
> > On Fri, Jul 17, 2020 at 1:41 AM Atish Patra <[email protected]> wrote:
> > > +#define DTB_EARLY_SIZE SZ_1M
> > > +static char early_dtb[DTB_EARLY_SIZE] __initdata;
> >
> > Hardcoding the size in .bss seems slightly problematic both for
> > small and for big systems. On machines with very little memory,
> > this can lead to running out of free pages before .initdata gets freed,
> > and it increases the size of the uncompressed vmlinux file by quite
> > a bit.
> >
> > On some systems, the 1MB limit may get too small. While most dtbs
> > would fall into the range between 10KB and 100KB, it can also be
> > much larger than that, e.g. when there are DT properties that include
> > blobs like device firmware that gets loaded into hardware by a kernel
> > driver.
> >
> I was not aware that we can do such things. Is there a current example of that ?
I worked on a product in the distant past where the host firmware
included the ethernet controller firmware as a DT property[1] to get around
restrictions on redistributing the blob in the linux-firmware package.
For the .dts files we distribute with the kernel, that would not make
sense, and I don't know of any current machines that do this in their
system firmware.
> > Is there anything stopping you from parsing the FDT in its original
> > location without the extra copy before it gets unflattened?
>
> That's what the original code was doing. A fixmap entry was added to
> map the original fdt
> location to a virtual so that parse_dtb can be operated on a virtual
> address. But we can't map
> both FDT & early ioremap within a single PMD region( 2MB ). That's why
> we removed the DT
> mapping from the fixmap to .bss section. The other alternate option is
> to increase the fixmap space to 4MB which seems more fragile.
Could the original location just be part of the regular linear mapping of all
RAM? I'm not too familiar with the early mapping code myself, so it may not
be possible, but that would be the most logical place where I'd put it.
Arnd
[1] drivers/net/ethernet/toshiba/spider_net.c
On Sat, Jul 18, 2020 at 2:24 AM Arnd Bergmann <[email protected]> wrote:
>
> On Sat, Jul 18, 2020 at 3:05 AM Atish Patra <[email protected]> wrote:
> > On Thu, Jul 16, 2020 at 11:32 PM Arnd Bergmann <[email protected]> wrote:
> > > On Fri, Jul 17, 2020 at 1:41 AM Atish Patra <[email protected]> wrote:
> > > > +#define DTB_EARLY_SIZE SZ_1M
> > > > +static char early_dtb[DTB_EARLY_SIZE] __initdata;
> > >
> > > Hardcoding the size in .bss seems slightly problematic both for
> > > small and for big systems. On machines with very little memory,
> > > this can lead to running out of free pages before .initdata gets freed,
> > > and it increases the size of the uncompressed vmlinux file by quite
> > > a bit.
> > >
> > > On some systems, the 1MB limit may get too small. While most dtbs
> > > would fall into the range between 10KB and 100KB, it can also be
> > > much larger than that, e.g. when there are DT properties that include
> > > blobs like device firmware that gets loaded into hardware by a kernel
> > > driver.
> > >
> > I was not aware that we can do such things. Is there a current example of that ?
>
> I worked on a product in the distant past where the host firmware
> included the ethernet controller firmware as a DT property[1] to get around
> restrictions on redistributing the blob in the linux-firmware package.
>
> For the .dts files we distribute with the kernel, that would not make
> sense, and I don't know of any current machines that do this in their
> system firmware.
>
> > > Is there anything stopping you from parsing the FDT in its original
> > > location without the extra copy before it gets unflattened?
> >
> > That's what the original code was doing. A fixmap entry was added to
> > map the original fdt
> > location to a virtual so that parse_dtb can be operated on a virtual
> > address. But we can't map
> > both FDT & early ioremap within a single PMD region( 2MB ). That's why
> > we removed the DT
> > mapping from the fixmap to .bss section. The other alternate option is
> > to increase the fixmap space to 4MB which seems more fragile.
>
> Could the original location just be part of the regular linear mapping of all
> RAM?
No. Because we don't map the entire RAM until setup_vm_final().
We need to parse DT before setup_vm_final() to get the memblocks and
reserved memory regions.
I'm not too familiar with the early mapping code myself, so it may not
> be possible, but that would be the most logical place where I'd put it.
>
> Arnd
>
> [1] drivers/net/ethernet/toshiba/spider_net.c
--
Regards,
Atish
On Tue, Jul 21, 2020 at 6:18 AM Atish Patra <[email protected]> wrote:
> On Sat, Jul 18, 2020 at 2:24 AM Arnd Bergmann <[email protected]> wrote:
> > On Sat, Jul 18, 2020 at 3:05 AM Atish Patra <[email protected]> wrote:
> > > That's what the original code was doing. A fixmap entry was added to
> > > map the original fdt
> > > location to a virtual so that parse_dtb can be operated on a virtual
> > > address. But we can't map
> > > both FDT & early ioremap within a single PMD region( 2MB ). That's why
> > > we removed the DT
> > > mapping from the fixmap to .bss section. The other alternate option is
> > > to increase the fixmap space to 4MB which seems more fragile.
> >
> > Could the original location just be part of the regular linear mapping of all
> > RAM?
>
> No. Because we don't map the entire RAM until setup_vm_final().
> We need to parse DT before setup_vm_final() to get the memblocks and
> reserved memory regions.
Ok, I see how you create a direct mapping for the kernel image, plus
the fixmap for the dtb in setup_vm(), and how moving the dtb into the
kernel image simplifies that.
I'm still wondering why you can't do the same kind of PGD mapping
for the dtb that you do for the vmlinux, creating linear page table
entries exactly for the location that holds the dtb, from dtb_pa to
dtb_pa+((struct fdt_header*)dtb_pa)->totalsize.
Arnd
On Tue, 21 Jul 2020 at 11:57, Arnd Bergmann <[email protected]> wrote:
>
> On Tue, Jul 21, 2020 at 6:18 AM Atish Patra <[email protected]> wrote:
> > On Sat, Jul 18, 2020 at 2:24 AM Arnd Bergmann <[email protected]> wrote:
> > > On Sat, Jul 18, 2020 at 3:05 AM Atish Patra <[email protected]> wrote:
> > > > That's what the original code was doing. A fixmap entry was added to
> > > > map the original fdt
> > > > location to a virtual so that parse_dtb can be operated on a virtual
> > > > address. But we can't map
> > > > both FDT & early ioremap within a single PMD region( 2MB ). That's why
> > > > we removed the DT
> > > > mapping from the fixmap to .bss section. The other alternate option is
> > > > to increase the fixmap space to 4MB which seems more fragile.
> > >
> > > Could the original location just be part of the regular linear mapping of all
> > > RAM?
> >
> > No. Because we don't map the entire RAM until setup_vm_final().
> > We need to parse DT before setup_vm_final() to get the memblocks and
> > reserved memory regions.
>
> Ok, I see how you create a direct mapping for the kernel image, plus
> the fixmap for the dtb in setup_vm(), and how moving the dtb into the
> kernel image simplifies that.
>
> I'm still wondering why you can't do the same kind of PGD mapping
> for the dtb that you do for the vmlinux, creating linear page table
> entries exactly for the location that holds the dtb, from dtb_pa to
> dtb_pa+((struct fdt_header*)dtb_pa)->totalsize.
>
On arm64, we limit the size of the DT to 2MB, and reserve a pair of
PMD entries adjacent to the fixmap so we can map it r/o statically
using huge pages without using fixmap/early_ioremap slots. (Using a
pair of PMD entries allows the DT to appear at any alignment in
memory, given that PMD entries cover 2 MB each on 4k pages kernels)
On Tue, Jul 21, 2020 at 2:32 PM Ard Biesheuvel <[email protected]> wrote:
>
> On Tue, 21 Jul 2020 at 11:57, Arnd Bergmann <[email protected]> wrote:
> >
> > On Tue, Jul 21, 2020 at 6:18 AM Atish Patra <[email protected]> wrote:
> > > On Sat, Jul 18, 2020 at 2:24 AM Arnd Bergmann <[email protected]> wrote:
> > > > On Sat, Jul 18, 2020 at 3:05 AM Atish Patra <[email protected]> wrote:
> > > > > That's what the original code was doing. A fixmap entry was added to
> > > > > map the original fdt
> > > > > location to a virtual so that parse_dtb can be operated on a virtual
> > > > > address. But we can't map
> > > > > both FDT & early ioremap within a single PMD region( 2MB ). That's why
> > > > > we removed the DT
> > > > > mapping from the fixmap to .bss section. The other alternate option is
> > > > > to increase the fixmap space to 4MB which seems more fragile.
> > > >
> > > > Could the original location just be part of the regular linear mapping of all
> > > > RAM?
> > >
> > > No. Because we don't map the entire RAM until setup_vm_final().
> > > We need to parse DT before setup_vm_final() to get the memblocks and
> > > reserved memory regions.
> >
> > Ok, I see how you create a direct mapping for the kernel image, plus
> > the fixmap for the dtb in setup_vm(), and how moving the dtb into the
> > kernel image simplifies that.
> >
> > I'm still wondering why you can't do the same kind of PGD mapping
> > for the dtb that you do for the vmlinux, creating linear page table
> > entries exactly for the location that holds the dtb, from dtb_pa to
> > dtb_pa+((struct fdt_header*)dtb_pa)->totalsize.
> >
>
> On arm64, we limit the size of the DT to 2MB, and reserve a pair of
> PMD entries adjacent to the fixmap so we can map it r/o statically
> using huge pages without using fixmap/early_ioremap slots. (Using a
> pair of PMD entries allows the DT to appear at any alignment in
> memory, given that PMD entries cover 2 MB each on 4k pages kernels)
The arch/riscv is common for both RV32 and RV64. On RV32, we don't
have PMD due to only two-levels in the page table.
Although, I like the idea of two consecutive PMD mappings which are not
part of FIXMAP. The RISC-V early page table is totally different from the
final init_mm page table. I think we can do two consecutive PGD mappings
in the early page table at lower addresses (quite below PAGE_OFFSET). I
will play-around with this idea.
Regards,
Anup