This series aims at improving kernel permissions by doing following things.
1. Protect kernel sections early instead of after /init.
2. Protect .init.text & .init.data sections with appropriate permissions.
3. Move dynamic relocation section to _init.
This series is based on Guo's static object fixes[1].
[1]https://patchwork.kernel.org/project/linux-riscv/list/?series=360951
Atish Patra (5):
RISC-V: Move __start_kernel to .head.text
RISC-V: Initialize SBI early
RISC-V: Enforce protections for kernel sections early
RISC-V: Protect .init.text & .init.data
RISC-V: Move dynamic relocation section under __init
arch/riscv/include/asm/sections.h | 2 ++
arch/riscv/include/asm/set_memory.h | 4 ++++
arch/riscv/kernel/head.S | 1 -
arch/riscv/kernel/setup.c | 13 +++++++++----
arch/riscv/kernel/vmlinux.lds.S | 18 +++++++++++++-----
arch/riscv/mm/init.c | 17 +++++++++++++++--
arch/riscv/mm/pageattr.c | 6 ++++++
7 files changed, 49 insertions(+), 12 deletions(-)
--
2.25.1
Currently, SBI is initialized towards the end of arch setup. This prevents
the set memory operations to be invoked earlier as it requires a full tlb
flush.
Initialize SBI as early as possible.
Signed-off-by: Atish Patra <[email protected]>
---
arch/riscv/kernel/setup.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 1db17f37736e..a5cac440aadf 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -90,6 +90,9 @@ void __init setup_arch(char **cmdline_p)
pr_err("No DTB found in kernel mappings\n");
#endif
+#if IS_ENABLED(CONFIG_RISCV_SBI)
+ sbi_init();
+#endif
#ifdef CONFIG_SWIOTLB
swiotlb_init(1);
#endif
@@ -98,10 +101,6 @@ void __init setup_arch(char **cmdline_p)
kasan_init();
#endif
-#if IS_ENABLED(CONFIG_RISCV_SBI)
- sbi_init();
-#endif
-
#ifdef CONFIG_SMP
setup_smp();
#endif
--
2.25.1
Currently, __start_kernel is kept in _init while _start is in head section.
This may result in "relocation truncated to fit error" if _init section is
moved far from head. It also makes sense to keep entire head.S in one
section.
Keep __start_kernel in head section rather than _init.
Signed-off-by: Atish Patra <[email protected]>
---
arch/riscv/kernel/head.S | 1 -
1 file changed, 1 deletion(-)
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 3631147732ee..e820b0c09528 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -178,7 +178,6 @@ setup_trap_vector:
END(_start)
- __INIT
ENTRY(_start_kernel)
/* Mask all interrupts */
csrw CSR_IE, zero
--
2.25.1
Dynamic relocation section are only required during boot. Those sections
can be freed after init. Thus, it can be moved to __init section.
Signed-off-by: Atish Patra <[email protected]>
---
arch/riscv/kernel/vmlinux.lds.S | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 15b9882588ae..4aedb4fd79e5 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -60,6 +60,10 @@ SECTIONS
}
PERCPU_SECTION(L1_CACHE_BYTES)
+ .rel.dyn : {
+ *(.rel.dyn*)
+ }
+
__init_data_end = .;
__init_end = .;
@@ -112,10 +116,6 @@ SECTIONS
BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
- .rel.dyn : {
- *(.rel.dyn*)
- }
-
#ifdef CONFIG_EFI
. = ALIGN(PECOFF_SECTION_ALIGNMENT);
__pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
--
2.25.1
Currently, .init.text & .init.data are intermixed which makes it impossible
apply different permissions to them. .init.data shouldn't need exec
permissions while .init.text shouldn't have write permission.
Keep them in separate sections so that different permissions are applied to
each section. This improves the kernel protection under
CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
entire _init section after it is freed so that those pages can be used for
other purpose.
Signed-off-by: Atish Patra <[email protected]>
---
arch/riscv/include/asm/sections.h | 2 ++
arch/riscv/include/asm/set_memory.h | 2 ++
arch/riscv/kernel/setup.c | 4 ++++
arch/riscv/kernel/vmlinux.lds.S | 10 +++++++++-
arch/riscv/mm/init.c | 6 ++++++
arch/riscv/mm/pageattr.c | 6 ++++++
6 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
index d60802bfafbc..730d2c4a844d 100644
--- a/arch/riscv/include/asm/sections.h
+++ b/arch/riscv/include/asm/sections.h
@@ -10,6 +10,8 @@
#include <asm-generic/sections.h>
extern char _start[];
extern char _start_kernel[];
+extern char __init_data_begin[], __init_data_end[];
+extern char __init_text_begin[], __init_text_end[];
extern bool init_mem_is_free;
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index 4cc3a4e2afd3..913429c9c1ae 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -15,6 +15,7 @@ int set_memory_ro(unsigned long addr, int numpages);
int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_default(unsigned long addr, int numpages);
void protect_kernel_text_data(void);
#else
static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
@@ -22,6 +23,7 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
static inline void protect_kernel_text_data(void) {};
+static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
#endif
int set_direct_map_invalid_noflush(struct page *page);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 4176a2affd1d..b8a35ef0eab0 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -129,6 +129,10 @@ bool init_mem_is_free = false;
void free_initmem(void)
{
+ unsigned long init_begin = (unsigned long)__init_begin;
+ unsigned long init_end = (unsigned long)__init_end;
+
+ set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
free_initmem_default(POISON_FREE_INITMEM);
init_mem_is_free = true;
}
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 0807633f0dc8..15b9882588ae 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -30,8 +30,8 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__init_begin = .;
+ __init_text_begin = .;
INIT_TEXT_SECTION(PAGE_SIZE)
- INIT_DATA_SECTION(16)
. = ALIGN(8);
__soc_early_init_table : {
__soc_early_init_table_start = .;
@@ -48,11 +48,19 @@ SECTIONS
{
EXIT_TEXT
}
+
+ __init_text_end = .;
+ . = ALIGN(SECTION_ALIGN);
+ /* Start of init data section */
+ __init_data_begin = .;
+ INIT_DATA_SECTION(16)
.exit.data :
{
EXIT_DATA
}
PERCPU_SECTION(L1_CACHE_BYTES)
+
+ __init_data_end = .;
__init_end = .;
. = ALIGN(SECTION_ALIGN);
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 7859a1d1b34d..3ef0eafcc7c7 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -627,11 +627,17 @@ void protect_kernel_text_data(void)
{
unsigned long text_start = (unsigned long)_text;
unsigned long text_end = (unsigned long)_etext;
+ unsigned long init_text_start = (unsigned long)__init_text_begin;
+ unsigned long init_text_end = (unsigned long)__init_text_end;
+ unsigned long init_data_start = (unsigned long)__init_data_begin;
+ unsigned long init_data_end = (unsigned long)__init_data_end;
unsigned long rodata_start = (unsigned long)__start_rodata;
unsigned long data_start = (unsigned long)_data;
unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
+ set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
+ set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
}
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index 19fecb362d81..aecedaf086ab 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
return ret;
}
+int set_memory_default(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
+ __pgprot(0));
+}
+
int set_memory_ro(unsigned long addr, int numpages)
{
return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
--
2.25.1
Currently, all memblocks are mapped with PAGE_KERNEL_EXEC and the strict
permissions are only enforced after /init starts. This leaves the kernel
vulnerable from possible buggy built-in modules.
Apply permissions to individual sections as early as possible.
Signed-off-by: Atish Patra <[email protected]>
---
arch/riscv/include/asm/set_memory.h | 2 ++
arch/riscv/kernel/setup.c | 2 ++
arch/riscv/mm/init.c | 11 +++++++++--
3 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index 4c5bae7ca01c..4cc3a4e2afd3 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -15,11 +15,13 @@ int set_memory_ro(unsigned long addr, int numpages);
int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
+void protect_kernel_text_data(void);
#else
static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
+static inline void protect_kernel_text_data(void) {};
#endif
int set_direct_map_invalid_noflush(struct page *page);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index a5cac440aadf..4176a2affd1d 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -23,6 +23,7 @@
#include <asm/cpu_ops.h>
#include <asm/early_ioremap.h>
#include <asm/setup.h>
+#include <asm/set_memory.h>
#include <asm/sections.h>
#include <asm/sbi.h>
#include <asm/tlbflush.h>
@@ -93,6 +94,7 @@ void __init setup_arch(char **cmdline_p)
#if IS_ENABLED(CONFIG_RISCV_SBI)
sbi_init();
#endif
+ protect_kernel_text_data();
#ifdef CONFIG_SWIOTLB
swiotlb_init(1);
#endif
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index c888c4470b34..7859a1d1b34d 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -623,7 +623,7 @@ static inline void setup_vm_final(void)
#endif /* CONFIG_MMU */
#ifdef CONFIG_STRICT_KERNEL_RWX
-void mark_rodata_ro(void)
+void protect_kernel_text_data(void)
{
unsigned long text_start = (unsigned long)_text;
unsigned long text_end = (unsigned long)_etext;
@@ -632,9 +632,16 @@ void mark_rodata_ro(void)
unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
- set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
+}
+
+void mark_rodata_ro(void)
+{
+ unsigned long rodata_start = (unsigned long)__start_rodata;
+ unsigned long data_start = (unsigned long)_data;
+
+ set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
debug_checkwx();
}
--
2.25.1
Atish Patra <[email protected]> 於 2020年10月10日 週六 上午5:13寫道:
>
> Currently, .init.text & .init.data are intermixed which makes it impossible
> apply different permissions to them. .init.data shouldn't need exec
> permissions while .init.text shouldn't have write permission.
>
> Keep them in separate sections so that different permissions are applied to
> each section. This improves the kernel protection under
> CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
> entire _init section after it is freed so that those pages can be used for
> other purpose.
>
> Signed-off-by: Atish Patra <[email protected]>
> ---
> arch/riscv/include/asm/sections.h | 2 ++
> arch/riscv/include/asm/set_memory.h | 2 ++
> arch/riscv/kernel/setup.c | 4 ++++
> arch/riscv/kernel/vmlinux.lds.S | 10 +++++++++-
> arch/riscv/mm/init.c | 6 ++++++
> arch/riscv/mm/pageattr.c | 6 ++++++
> 6 files changed, 29 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
> index d60802bfafbc..730d2c4a844d 100644
> --- a/arch/riscv/include/asm/sections.h
> +++ b/arch/riscv/include/asm/sections.h
> @@ -10,6 +10,8 @@
> #include <asm-generic/sections.h>
> extern char _start[];
> extern char _start_kernel[];
> +extern char __init_data_begin[], __init_data_end[];
> +extern char __init_text_begin[], __init_text_end[];
>
> extern bool init_mem_is_free;
>
> diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> index 4cc3a4e2afd3..913429c9c1ae 100644
> --- a/arch/riscv/include/asm/set_memory.h
> +++ b/arch/riscv/include/asm/set_memory.h
> @@ -15,6 +15,7 @@ int set_memory_ro(unsigned long addr, int numpages);
> int set_memory_rw(unsigned long addr, int numpages);
> int set_memory_x(unsigned long addr, int numpages);
> int set_memory_nx(unsigned long addr, int numpages);
> +int set_memory_default(unsigned long addr, int numpages);
> void protect_kernel_text_data(void);
> #else
> static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
> @@ -22,6 +23,7 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
> static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> static inline void protect_kernel_text_data(void) {};
> +static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
> #endif
>
> int set_direct_map_invalid_noflush(struct page *page);
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index 4176a2affd1d..b8a35ef0eab0 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -129,6 +129,10 @@ bool init_mem_is_free = false;
>
> void free_initmem(void)
> {
> + unsigned long init_begin = (unsigned long)__init_begin;
> + unsigned long init_end = (unsigned long)__init_end;
> +
> + set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
> free_initmem_default(POISON_FREE_INITMEM);
> init_mem_is_free = true;
> }
> diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> index 0807633f0dc8..15b9882588ae 100644
> --- a/arch/riscv/kernel/vmlinux.lds.S
> +++ b/arch/riscv/kernel/vmlinux.lds.S
> @@ -30,8 +30,8 @@ SECTIONS
> . = ALIGN(PAGE_SIZE);
>
> __init_begin = .;
> + __init_text_begin = .;
> INIT_TEXT_SECTION(PAGE_SIZE)
> - INIT_DATA_SECTION(16)
> . = ALIGN(8);
> __soc_early_init_table : {
> __soc_early_init_table_start = .;
> @@ -48,11 +48,19 @@ SECTIONS
> {
> EXIT_TEXT
> }
> +
> + __init_text_end = .;
> + . = ALIGN(SECTION_ALIGN);
> + /* Start of init data section */
> + __init_data_begin = .;
> + INIT_DATA_SECTION(16)
> .exit.data :
> {
> EXIT_DATA
> }
> PERCPU_SECTION(L1_CACHE_BYTES)
> +
> + __init_data_end = .;
> __init_end = .;
>
> . = ALIGN(SECTION_ALIGN);
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 7859a1d1b34d..3ef0eafcc7c7 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -627,11 +627,17 @@ void protect_kernel_text_data(void)
> {
> unsigned long text_start = (unsigned long)_text;
> unsigned long text_end = (unsigned long)_etext;
> + unsigned long init_text_start = (unsigned long)__init_text_begin;
> + unsigned long init_text_end = (unsigned long)__init_text_end;
> + unsigned long init_data_start = (unsigned long)__init_data_begin;
> + unsigned long init_data_end = (unsigned long)__init_data_end;
> unsigned long rodata_start = (unsigned long)__start_rodata;
> unsigned long data_start = (unsigned long)_data;
> unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
>
> + set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
> set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
> + set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
> set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
> set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
> }
> diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> index 19fecb362d81..aecedaf086ab 100644
> --- a/arch/riscv/mm/pageattr.c
> +++ b/arch/riscv/mm/pageattr.c
> @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> return ret;
> }
>
> +int set_memory_default(unsigned long addr, int numpages)
> +{
> + return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
> + __pgprot(0));
> +}
> +
> int set_memory_ro(unsigned long addr, int numpages)
> {
> return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
Hi Atish,
I tested this patchset and CONFIG_DEBUG_WX=y
[ 2.664012] Freeing unused kernel memory: 114420K
[ 2.666081] ------------[ cut here ]------------
[ 2.666779] riscv/mm: Found insecure W+X mapping at address
(____ptrval____)/0xffffffe000000000
[ 2.668004] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:200
note_page+0xc2/0x238
[ 2.669147] Modules linked in:
[ 2.669735] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
5.9.0-rc8-00033-gfacf070a80ea #153
[ 2.670466] epc: ffffffe00700697c ra : ffffffe00700697c sp : ffffffe0fba9bc10
[ 2.671054] gp : ffffffe007e73078 tp : ffffffe0fba90000 t0 :
ffffffe007e861d0
[ 2.671683] t1 : 0000000000000064 t2 : ffffffe007801664 s0 :
ffffffe0fba9bc60
[ 2.672499] s1 : ffffffe0fba9bde8 a0 : 0000000000000053 a1 :
0000000000000020
[ 2.673119] a2 : 0000000000000000 a3 : 00000000000000f4 a4 :
d4328dc070ccb100
[ 2.673729] a5 : d4328dc070ccb100 a6 : 0000000000000000 a7 :
ffffffe007851e98
[ 2.674333] s2 : ffffffe007000000 s3 : ffffffe0fba9bde8 s4 :
0000000087200000
[ 2.674963] s5 : 00000000000000cb s6 : 0000000000000003 s7 :
ffffffe007e7ac00
[ 2.675564] s8 : ffffffe000000000 s9 : ffffffe007000000 s10:
ffffffe000000000
[ 2.676392] s11: ffffffe040000000 t3 : 000000000001da48 t4 :
000000000001da48
[ 2.676992] t5 : ffffffe007e74490 t6 : ffffffe007e81432
[ 2.677449] status: 0000000000000120 badaddr: 0000000000000000
cause: 0000000000000003
[ 2.678128] ---[ end trace 5f0a86dbe986db9b ]---
[ 2.678952] Checked W+X mappings: failed, 28672 W+X pages found
[ 2.679737] Run /init as init process
After debugging, I found it is because
set_memory_ro()/set_memory_nx()/... can't handle the 4KB-aligned
address if it is a 2MB mapping address.
For example, if we gave 0xffffffe000001000 and this address is in a
PMD mapping which is 2MB mapping, it will set the page attributes from
0xffffffe000000000.
And this caused the CPU hotplug failed because it is in head.text section.
#0 pageattr_pmd_entry (pmd=0xffffffe0ffdff000,
addr=18446743936270602240, next=18446743936270766080,
walk=0xffffffe007e03e98) at arch/riscv/mm/pageattr.c:70
70 pmd_t val = READ_ONCE(*pmd);
=> 0xffffffe007005e96 <pageattr_pmd_entry+0>: 41 11 addi sp,sp,-16
0xffffffe007005e98 <pageattr_pmd_entry+2>: 22 e4 sd s0,8(sp)
0xffffffe007005e9a <pageattr_pmd_entry+4>: 00 08 addi s0,sp,16
0xffffffe007005e9c <pageattr_pmd_entry+6>: 1c 61 ld a5,0(a0)
(gdb) p/x addr
$21 = 0xffffffe000001000
# cat /sys/kernel/debug/kernel_page_tables
---[ Fixmap start ]---
0xffffffcefef00000-0xffffffceff000000 0x0000000082200000 1M
PTE D A . . . W R V
---[ Fixmap end ]---
---[ PCI I/O start ]---
---[ PCI I/O end ]---
---[ vmalloc() area ]---
0xffffffd000090000-0xffffffd000093000 0x00000001741bb000 12K
PTE D A . . . W R V
0xffffffdffffbb000-0xffffffdffffbe000 0x0000000175f97000 12K
PTE D A . . . W R V
0xffffffdffffcc000-0xffffffdffffcf000 0x0000000175f9a000 12K
PTE D A . . . W R V
0xffffffdffffdd000-0xffffffdffffe0000 0x0000000175f9d000 12K
PTE D A . . . W R V
0xffffffdffffee000-0xffffffdfffff1000 0x0000000175fa0000 12K
PTE D A . . . W R V
---[ vmalloc() end ]---
---[ Linear mapping ]---
0xffffffe000000000-0xffffffe007000000 0x0000000080200000 112M
PMD D A . . X W R V
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
0xffffffe007000000-0xffffffe007800000 0x0000000087200000 8M
PMD D A . . X . R V
0xffffffe007800000-0xffffffe007e00000 0x0000000087a00000 6M
PMD D A . . . . R V
0xffffffe007e00000-0xffffffe0ffe00000 0x0000000088000000 3968M
PMD D A . . . W R V
#
Any idea?
On Mon, Oct 12, 2020 at 6:15 AM Greentime Hu <[email protected]> wrote:
>
> Atish Patra <[email protected]> 於 2020年10月10日 週六 上午5:13寫道:
> >
> > Currently, .init.text & .init.data are intermixed which makes it impossible
> > apply different permissions to them. .init.data shouldn't need exec
> > permissions while .init.text shouldn't have write permission.
> >
> > Keep them in separate sections so that different permissions are applied to
> > each section. This improves the kernel protection under
> > CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
> > entire _init section after it is freed so that those pages can be used for
> > other purpose.
> >
> > Signed-off-by: Atish Patra <[email protected]>
> > ---
> > arch/riscv/include/asm/sections.h | 2 ++
> > arch/riscv/include/asm/set_memory.h | 2 ++
> > arch/riscv/kernel/setup.c | 4 ++++
> > arch/riscv/kernel/vmlinux.lds.S | 10 +++++++++-
> > arch/riscv/mm/init.c | 6 ++++++
> > arch/riscv/mm/pageattr.c | 6 ++++++
> > 6 files changed, 29 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
> > index d60802bfafbc..730d2c4a844d 100644
> > --- a/arch/riscv/include/asm/sections.h
> > +++ b/arch/riscv/include/asm/sections.h
> > @@ -10,6 +10,8 @@
> > #include <asm-generic/sections.h>
> > extern char _start[];
> > extern char _start_kernel[];
> > +extern char __init_data_begin[], __init_data_end[];
> > +extern char __init_text_begin[], __init_text_end[];
> >
> > extern bool init_mem_is_free;
> >
> > diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> > index 4cc3a4e2afd3..913429c9c1ae 100644
> > --- a/arch/riscv/include/asm/set_memory.h
> > +++ b/arch/riscv/include/asm/set_memory.h
> > @@ -15,6 +15,7 @@ int set_memory_ro(unsigned long addr, int numpages);
> > int set_memory_rw(unsigned long addr, int numpages);
> > int set_memory_x(unsigned long addr, int numpages);
> > int set_memory_nx(unsigned long addr, int numpages);
> > +int set_memory_default(unsigned long addr, int numpages);
> > void protect_kernel_text_data(void);
> > #else
> > static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
> > @@ -22,6 +23,7 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
> > static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> > static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> > static inline void protect_kernel_text_data(void) {};
> > +static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
> > #endif
> >
> > int set_direct_map_invalid_noflush(struct page *page);
> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > index 4176a2affd1d..b8a35ef0eab0 100644
> > --- a/arch/riscv/kernel/setup.c
> > +++ b/arch/riscv/kernel/setup.c
> > @@ -129,6 +129,10 @@ bool init_mem_is_free = false;
> >
> > void free_initmem(void)
> > {
> > + unsigned long init_begin = (unsigned long)__init_begin;
> > + unsigned long init_end = (unsigned long)__init_end;
> > +
> > + set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
> > free_initmem_default(POISON_FREE_INITMEM);
> > init_mem_is_free = true;
> > }
> > diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> > index 0807633f0dc8..15b9882588ae 100644
> > --- a/arch/riscv/kernel/vmlinux.lds.S
> > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > @@ -30,8 +30,8 @@ SECTIONS
> > . = ALIGN(PAGE_SIZE);
> >
> > __init_begin = .;
> > + __init_text_begin = .;
> > INIT_TEXT_SECTION(PAGE_SIZE)
> > - INIT_DATA_SECTION(16)
> > . = ALIGN(8);
> > __soc_early_init_table : {
> > __soc_early_init_table_start = .;
> > @@ -48,11 +48,19 @@ SECTIONS
> > {
> > EXIT_TEXT
> > }
> > +
> > + __init_text_end = .;
> > + . = ALIGN(SECTION_ALIGN);
> > + /* Start of init data section */
> > + __init_data_begin = .;
> > + INIT_DATA_SECTION(16)
> > .exit.data :
> > {
> > EXIT_DATA
> > }
> > PERCPU_SECTION(L1_CACHE_BYTES)
> > +
> > + __init_data_end = .;
> > __init_end = .;
> >
> > . = ALIGN(SECTION_ALIGN);
> > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > index 7859a1d1b34d..3ef0eafcc7c7 100644
> > --- a/arch/riscv/mm/init.c
> > +++ b/arch/riscv/mm/init.c
> > @@ -627,11 +627,17 @@ void protect_kernel_text_data(void)
> > {
> > unsigned long text_start = (unsigned long)_text;
> > unsigned long text_end = (unsigned long)_etext;
> > + unsigned long init_text_start = (unsigned long)__init_text_begin;
> > + unsigned long init_text_end = (unsigned long)__init_text_end;
> > + unsigned long init_data_start = (unsigned long)__init_data_begin;
> > + unsigned long init_data_end = (unsigned long)__init_data_end;
> > unsigned long rodata_start = (unsigned long)__start_rodata;
> > unsigned long data_start = (unsigned long)_data;
> > unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
> >
> > + set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
> > set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
> > + set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
> > set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
> > set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
> > }
> > diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> > index 19fecb362d81..aecedaf086ab 100644
> > --- a/arch/riscv/mm/pageattr.c
> > +++ b/arch/riscv/mm/pageattr.c
> > @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> > return ret;
> > }
> >
> > +int set_memory_default(unsigned long addr, int numpages)
> > +{
> > + return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
> > + __pgprot(0));
> > +}
> > +
> > int set_memory_ro(unsigned long addr, int numpages)
> > {
> > return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
>
>
> Hi Atish,
>
> I tested this patchset and CONFIG_DEBUG_WX=y
>
Thanks for testing the patch.
> [ 2.664012] Freeing unused kernel memory: 114420K
> [ 2.666081] ------------[ cut here ]------------
> [ 2.666779] riscv/mm: Found insecure W+X mapping at address
> (____ptrval____)/0xffffffe000000000
> [ 2.668004] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:200
> note_page+0xc2/0x238
> [ 2.669147] Modules linked in:
> [ 2.669735] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> 5.9.0-rc8-00033-gfacf070a80ea #153
> [ 2.670466] epc: ffffffe00700697c ra : ffffffe00700697c sp : ffffffe0fba9bc10
> [ 2.671054] gp : ffffffe007e73078 tp : ffffffe0fba90000 t0 :
> ffffffe007e861d0
> [ 2.671683] t1 : 0000000000000064 t2 : ffffffe007801664 s0 :
> ffffffe0fba9bc60
> [ 2.672499] s1 : ffffffe0fba9bde8 a0 : 0000000000000053 a1 :
> 0000000000000020
> [ 2.673119] a2 : 0000000000000000 a3 : 00000000000000f4 a4 :
> d4328dc070ccb100
> [ 2.673729] a5 : d4328dc070ccb100 a6 : 0000000000000000 a7 :
> ffffffe007851e98
> [ 2.674333] s2 : ffffffe007000000 s3 : ffffffe0fba9bde8 s4 :
> 0000000087200000
> [ 2.674963] s5 : 00000000000000cb s6 : 0000000000000003 s7 :
> ffffffe007e7ac00
> [ 2.675564] s8 : ffffffe000000000 s9 : ffffffe007000000 s10:
> ffffffe000000000
> [ 2.676392] s11: ffffffe040000000 t3 : 000000000001da48 t4 :
> 000000000001da48
> [ 2.676992] t5 : ffffffe007e74490 t6 : ffffffe007e81432
> [ 2.677449] status: 0000000000000120 badaddr: 0000000000000000
> cause: 0000000000000003
> [ 2.678128] ---[ end trace 5f0a86dbe986db9b ]---
> [ 2.678952] Checked W+X mappings: failed, 28672 W+X pages found
> [ 2.679737] Run /init as init process
>
This would be triggered for the current kernel as well as we don't
protect the permission for __init section at all. As you correctly
pointed out, __init & .head sections are mapped with same permissions
because of 2MB mappings.
Currently, the entire head.text & init section have RWX permission.
This patch is trying remove the write permission from .init.text &
.head.text and execute permission from .init.data until kernel is
booted.
It doesn't provide full protection but better than the current scheme.
To remove the write permission of .head.txt only, we have to keep
.head.txt & .init.text section in separate sections. The linear
mapping would look like this in that case.
There are no issues as such but kernel size would increase by another 2M.
---[ Linear mapping ]---
0xffffffe000000000-0xffffffe000200000 0x0000000080200000 2M
PMD D A . . X . R V
0xffffffe000200000-0xffffffe000600000 0x0000000080400000 4M
PMD D A . . . W R V
0xffffffe000600000-0xffffffe000e00000 0x0000000080800000 8M
PMD D A . . X . R V
0xffffffe000e00000-0xffffffe001400000 0x0000000081000000 6M
PMD D A . . . . R V
0xffffffe001400000-0xffffffe03fe00000 0x0000000081600000 1002M
PMD D A . . . W R V
The other solution would be move init section below text. Keep text &
head in one section.
The .init.text & .init.data will be in separate sections after that.
Here is the mapping in that case.
---[ Linear mapping ]---
0xffffffe000000000-0xffffffe000800000 0x0000000080200000 8M
PMD D A . . X . R V
0xffffffe000800000-0xffffffe000c00000 0x0000000080a00000 4M
PMD D A . . . W R V
0xffffffe000c00000-0xffffffe001200000 0x0000000080e00000 6M
PMD D A . . . . R V
0xffffffe001200000-0xffffffe03fe00000 0x0000000081400000 1004M
PMD D A . . . W R V
I prefer the 2nd approach compared to the first one as it saves memory
and we can fix lockdep issue without adding arch_is_kernel_data
to sections.h (Guo's patch).
However, the 2nd approach throws this problem if
CONFIG_HARDENED_USERCOPY is enabled.
net/ipv4/ipconfig.o: in function `ic_setup_routes':
/home/atish/workspace/linux/net/ipv4/ipconfig.c:400:(.init.text+0x1c4):
relocation truncated to fit: R_RISCV_JAL against symbol `ip_rt_ioctl'
defined in .text section in net/ipv4/fib_frontend.o
make: *** [Makefile:1162: vmlinux] Error 1
I am currently looking into this to understand why R_RISCV_JAL is
generated a generic function invocation
where auipc + jalr should have been used.
> After debugging, I found it is because
> set_memory_ro()/set_memory_nx()/... can't handle the 4KB-aligned
> address if it is a 2MB mapping address.
> For example, if we gave 0xffffffe000001000 and this address is in a
> PMD mapping which is 2MB mapping, it will set the page attributes from
> 0xffffffe000000000.
> And this caused the CPU hotplug failed because it is in head.text section.
>
> #0 pageattr_pmd_entry (pmd=0xffffffe0ffdff000,
> addr=18446743936270602240, next=18446743936270766080,
> walk=0xffffffe007e03e98) at arch/riscv/mm/pageattr.c:70
> 70 pmd_t val = READ_ONCE(*pmd);
> => 0xffffffe007005e96 <pageattr_pmd_entry+0>: 41 11 addi sp,sp,-16
> 0xffffffe007005e98 <pageattr_pmd_entry+2>: 22 e4 sd s0,8(sp)
> 0xffffffe007005e9a <pageattr_pmd_entry+4>: 00 08 addi s0,sp,16
> 0xffffffe007005e9c <pageattr_pmd_entry+6>: 1c 61 ld a5,0(a0)
> (gdb) p/x addr
> $21 = 0xffffffe000001000
>
> # cat /sys/kernel/debug/kernel_page_tables
> ---[ Fixmap start ]---
> 0xffffffcefef00000-0xffffffceff000000 0x0000000082200000 1M
> PTE D A . . . W R V
> ---[ Fixmap end ]---
> ---[ PCI I/O start ]---
> ---[ PCI I/O end ]---
> ---[ vmalloc() area ]---
> 0xffffffd000090000-0xffffffd000093000 0x00000001741bb000 12K
> PTE D A . . . W R V
> 0xffffffdffffbb000-0xffffffdffffbe000 0x0000000175f97000 12K
> PTE D A . . . W R V
> 0xffffffdffffcc000-0xffffffdffffcf000 0x0000000175f9a000 12K
> PTE D A . . . W R V
> 0xffffffdffffdd000-0xffffffdffffe0000 0x0000000175f9d000 12K
> PTE D A . . . W R V
> 0xffffffdffffee000-0xffffffdfffff1000 0x0000000175fa0000 12K
> PTE D A . . . W R V
> ---[ vmalloc() end ]---
> ---[ Linear mapping ]---
> 0xffffffe000000000-0xffffffe007000000 0x0000000080200000 112M
> PMD D A . . X W R V
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 0xffffffe007000000-0xffffffe007800000 0x0000000087200000 8M
> PMD D A . . X . R V
> 0xffffffe007800000-0xffffffe007e00000 0x0000000087a00000 6M
> PMD D A . . . . R V
> 0xffffffe007e00000-0xffffffe0ffe00000 0x0000000088000000 3968M
> PMD D A . . . W R V
> #
>
> Any idea?
>
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv
--
Regards,
Atish
On Mon, Oct 12, 2020 at 4:26 PM Atish Patra <[email protected]> wrote:
>
> On Mon, Oct 12, 2020 at 6:15 AM Greentime Hu <[email protected]> wrote:
> >
> > Atish Patra <[email protected]> 於 2020年10月10日 週六 上午5:13寫道:
> > >
> > > Currently, .init.text & .init.data are intermixed which makes it impossible
> > > apply different permissions to them. .init.data shouldn't need exec
> > > permissions while .init.text shouldn't have write permission.
> > >
> > > Keep them in separate sections so that different permissions are applied to
> > > each section. This improves the kernel protection under
> > > CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
> > > entire _init section after it is freed so that those pages can be used for
> > > other purpose.
> > >
> > > Signed-off-by: Atish Patra <[email protected]>
> > > ---
> > > arch/riscv/include/asm/sections.h | 2 ++
> > > arch/riscv/include/asm/set_memory.h | 2 ++
> > > arch/riscv/kernel/setup.c | 4 ++++
> > > arch/riscv/kernel/vmlinux.lds.S | 10 +++++++++-
> > > arch/riscv/mm/init.c | 6 ++++++
> > > arch/riscv/mm/pageattr.c | 6 ++++++
> > > 6 files changed, 29 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
> > > index d60802bfafbc..730d2c4a844d 100644
> > > --- a/arch/riscv/include/asm/sections.h
> > > +++ b/arch/riscv/include/asm/sections.h
> > > @@ -10,6 +10,8 @@
> > > #include <asm-generic/sections.h>
> > > extern char _start[];
> > > extern char _start_kernel[];
> > > +extern char __init_data_begin[], __init_data_end[];
> > > +extern char __init_text_begin[], __init_text_end[];
> > >
> > > extern bool init_mem_is_free;
> > >
> > > diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> > > index 4cc3a4e2afd3..913429c9c1ae 100644
> > > --- a/arch/riscv/include/asm/set_memory.h
> > > +++ b/arch/riscv/include/asm/set_memory.h
> > > @@ -15,6 +15,7 @@ int set_memory_ro(unsigned long addr, int numpages);
> > > int set_memory_rw(unsigned long addr, int numpages);
> > > int set_memory_x(unsigned long addr, int numpages);
> > > int set_memory_nx(unsigned long addr, int numpages);
> > > +int set_memory_default(unsigned long addr, int numpages);
> > > void protect_kernel_text_data(void);
> > > #else
> > > static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
> > > @@ -22,6 +23,7 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
> > > static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> > > static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> > > static inline void protect_kernel_text_data(void) {};
> > > +static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
> > > #endif
> > >
> > > int set_direct_map_invalid_noflush(struct page *page);
> > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > index 4176a2affd1d..b8a35ef0eab0 100644
> > > --- a/arch/riscv/kernel/setup.c
> > > +++ b/arch/riscv/kernel/setup.c
> > > @@ -129,6 +129,10 @@ bool init_mem_is_free = false;
> > >
> > > void free_initmem(void)
> > > {
> > > + unsigned long init_begin = (unsigned long)__init_begin;
> > > + unsigned long init_end = (unsigned long)__init_end;
> > > +
> > > + set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
> > > free_initmem_default(POISON_FREE_INITMEM);
> > > init_mem_is_free = true;
> > > }
> > > diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> > > index 0807633f0dc8..15b9882588ae 100644
> > > --- a/arch/riscv/kernel/vmlinux.lds.S
> > > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > > @@ -30,8 +30,8 @@ SECTIONS
> > > . = ALIGN(PAGE_SIZE);
> > >
> > > __init_begin = .;
> > > + __init_text_begin = .;
> > > INIT_TEXT_SECTION(PAGE_SIZE)
> > > - INIT_DATA_SECTION(16)
> > > . = ALIGN(8);
> > > __soc_early_init_table : {
> > > __soc_early_init_table_start = .;
> > > @@ -48,11 +48,19 @@ SECTIONS
> > > {
> > > EXIT_TEXT
> > > }
> > > +
> > > + __init_text_end = .;
> > > + . = ALIGN(SECTION_ALIGN);
> > > + /* Start of init data section */
> > > + __init_data_begin = .;
> > > + INIT_DATA_SECTION(16)
> > > .exit.data :
> > > {
> > > EXIT_DATA
> > > }
> > > PERCPU_SECTION(L1_CACHE_BYTES)
> > > +
> > > + __init_data_end = .;
> > > __init_end = .;
> > >
> > > . = ALIGN(SECTION_ALIGN);
> > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > > index 7859a1d1b34d..3ef0eafcc7c7 100644
> > > --- a/arch/riscv/mm/init.c
> > > +++ b/arch/riscv/mm/init.c
> > > @@ -627,11 +627,17 @@ void protect_kernel_text_data(void)
> > > {
> > > unsigned long text_start = (unsigned long)_text;
> > > unsigned long text_end = (unsigned long)_etext;
> > > + unsigned long init_text_start = (unsigned long)__init_text_begin;
> > > + unsigned long init_text_end = (unsigned long)__init_text_end;
> > > + unsigned long init_data_start = (unsigned long)__init_data_begin;
> > > + unsigned long init_data_end = (unsigned long)__init_data_end;
> > > unsigned long rodata_start = (unsigned long)__start_rodata;
> > > unsigned long data_start = (unsigned long)_data;
> > > unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
> > >
> > > + set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
> > > set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
> > > + set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
> > > set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
> > > set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
> > > }
> > > diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> > > index 19fecb362d81..aecedaf086ab 100644
> > > --- a/arch/riscv/mm/pageattr.c
> > > +++ b/arch/riscv/mm/pageattr.c
> > > @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> > > return ret;
> > > }
> > >
> > > +int set_memory_default(unsigned long addr, int numpages)
> > > +{
> > > + return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
> > > + __pgprot(0));
> > > +}
> > > +
> > > int set_memory_ro(unsigned long addr, int numpages)
> > > {
> > > return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
> >
> >
> > Hi Atish,
> >
> > I tested this patchset and CONFIG_DEBUG_WX=y
> >
>
> Thanks for testing the patch.
>
> > [ 2.664012] Freeing unused kernel memory: 114420K
> > [ 2.666081] ------------[ cut here ]------------
> > [ 2.666779] riscv/mm: Found insecure W+X mapping at address
> > (____ptrval____)/0xffffffe000000000
> > [ 2.668004] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:200
> > note_page+0xc2/0x238
> > [ 2.669147] Modules linked in:
> > [ 2.669735] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > 5.9.0-rc8-00033-gfacf070a80ea #153
> > [ 2.670466] epc: ffffffe00700697c ra : ffffffe00700697c sp : ffffffe0fba9bc10
> > [ 2.671054] gp : ffffffe007e73078 tp : ffffffe0fba90000 t0 :
> > ffffffe007e861d0
> > [ 2.671683] t1 : 0000000000000064 t2 : ffffffe007801664 s0 :
> > ffffffe0fba9bc60
> > [ 2.672499] s1 : ffffffe0fba9bde8 a0 : 0000000000000053 a1 :
> > 0000000000000020
> > [ 2.673119] a2 : 0000000000000000 a3 : 00000000000000f4 a4 :
> > d4328dc070ccb100
> > [ 2.673729] a5 : d4328dc070ccb100 a6 : 0000000000000000 a7 :
> > ffffffe007851e98
> > [ 2.674333] s2 : ffffffe007000000 s3 : ffffffe0fba9bde8 s4 :
> > 0000000087200000
> > [ 2.674963] s5 : 00000000000000cb s6 : 0000000000000003 s7 :
> > ffffffe007e7ac00
> > [ 2.675564] s8 : ffffffe000000000 s9 : ffffffe007000000 s10:
> > ffffffe000000000
> > [ 2.676392] s11: ffffffe040000000 t3 : 000000000001da48 t4 :
> > 000000000001da48
> > [ 2.676992] t5 : ffffffe007e74490 t6 : ffffffe007e81432
> > [ 2.677449] status: 0000000000000120 badaddr: 0000000000000000
> > cause: 0000000000000003
> > [ 2.678128] ---[ end trace 5f0a86dbe986db9b ]---
> > [ 2.678952] Checked W+X mappings: failed, 28672 W+X pages found
> > [ 2.679737] Run /init as init process
> >
>
> This would be triggered for the current kernel as well as we don't
> protect the permission for __init section at all. As you correctly
> pointed out, __init & .head sections are mapped with same permissions
> because of 2MB mappings.
>
> Currently, the entire head.text & init section have RWX permission.
> This patch is trying remove the write permission from .init.text &
> .head.text and execute permission from .init.data until kernel is
> booted.
> It doesn't provide full protection but better than the current scheme.
>
> To remove the write permission of .head.txt only, we have to keep
> .head.txt & .init.text section in separate sections. The linear
> mapping would look like this in that case.
> There are no issues as such but kernel size would increase by another 2M.
>
> ---[ Linear mapping ]---
> 0xffffffe000000000-0xffffffe000200000 0x0000000080200000 2M
> PMD D A . . X . R V
> 0xffffffe000200000-0xffffffe000600000 0x0000000080400000 4M
> PMD D A . . . W R V
> 0xffffffe000600000-0xffffffe000e00000 0x0000000080800000 8M
> PMD D A . . X . R V
> 0xffffffe000e00000-0xffffffe001400000 0x0000000081000000 6M
> PMD D A . . . . R V
> 0xffffffe001400000-0xffffffe03fe00000 0x0000000081600000 1002M
> PMD D A . . . W R V
>
> The other solution would be move init section below text. Keep text &
> head in one section.
> The .init.text & .init.data will be in separate sections after that.
> Here is the mapping in that case.
>
> ---[ Linear mapping ]---
> 0xffffffe000000000-0xffffffe000800000 0x0000000080200000 8M
> PMD D A . . X . R V
> 0xffffffe000800000-0xffffffe000c00000 0x0000000080a00000 4M
> PMD D A . . . W R V
> 0xffffffe000c00000-0xffffffe001200000 0x0000000080e00000 6M
> PMD D A . . . . R V
> 0xffffffe001200000-0xffffffe03fe00000 0x0000000081400000 1004M
> PMD D A . . . W R V
>
> I prefer the 2nd approach compared to the first one as it saves memory
> and we can fix lockdep issue without adding arch_is_kernel_data
> to sections.h (Guo's patch).
>
> However, the 2nd approach throws this problem if
> CONFIG_HARDENED_USERCOPY is enabled.
>
> net/ipv4/ipconfig.o: in function `ic_setup_routes':
> /home/atish/workspace/linux/net/ipv4/ipconfig.c:400:(.init.text+0x1c4):
> relocation truncated to fit: R_RISCV_JAL against symbol `ip_rt_ioctl'
> defined in .text section in net/ipv4/fib_frontend.o
> make: *** [Makefile:1162: vmlinux] Error 1
>
> I am currently looking into this to understand why R_RISCV_JAL is
> generated a generic function invocation
> where auipc + jalr should have been used.
>
I checked the relocation is is correct 00000000000001d0 R_RISCV_CALL
ip_rt_ioctl
The assembly from objdump also shows a pair of auipc + jalr.
1d0: 00000097 auipc ra,0x0
1d4: 000080e7 jalr ra # 1d0 <.LBE348+0x4>
I don't understand why the toolchain is complaining about relocation
error only when
CONFIG_HARDENED_USERCOPY is enabled.
Are we seeing some subtle toolchain bug ?
> > After debugging, I found it is because
> > set_memory_ro()/set_memory_nx()/... can't handle the 4KB-aligned
> > address if it is a 2MB mapping address.
> > For example, if we gave 0xffffffe000001000 and this address is in a
> > PMD mapping which is 2MB mapping, it will set the page attributes from
> > 0xffffffe000000000.
> > And this caused the CPU hotplug failed because it is in head.text section.
> >
> > #0 pageattr_pmd_entry (pmd=0xffffffe0ffdff000,
> > addr=18446743936270602240, next=18446743936270766080,
> > walk=0xffffffe007e03e98) at arch/riscv/mm/pageattr.c:70
> > 70 pmd_t val = READ_ONCE(*pmd);
> > => 0xffffffe007005e96 <pageattr_pmd_entry+0>: 41 11 addi sp,sp,-16
> > 0xffffffe007005e98 <pageattr_pmd_entry+2>: 22 e4 sd s0,8(sp)
> > 0xffffffe007005e9a <pageattr_pmd_entry+4>: 00 08 addi s0,sp,16
> > 0xffffffe007005e9c <pageattr_pmd_entry+6>: 1c 61 ld a5,0(a0)
> > (gdb) p/x addr
> > $21 = 0xffffffe000001000
> >
> > # cat /sys/kernel/debug/kernel_page_tables
> > ---[ Fixmap start ]---
> > 0xffffffcefef00000-0xffffffceff000000 0x0000000082200000 1M
> > PTE D A . . . W R V
> > ---[ Fixmap end ]---
> > ---[ PCI I/O start ]---
> > ---[ PCI I/O end ]---
> > ---[ vmalloc() area ]---
> > 0xffffffd000090000-0xffffffd000093000 0x00000001741bb000 12K
> > PTE D A . . . W R V
> > 0xffffffdffffbb000-0xffffffdffffbe000 0x0000000175f97000 12K
> > PTE D A . . . W R V
> > 0xffffffdffffcc000-0xffffffdffffcf000 0x0000000175f9a000 12K
> > PTE D A . . . W R V
> > 0xffffffdffffdd000-0xffffffdffffe0000 0x0000000175f9d000 12K
> > PTE D A . . . W R V
> > 0xffffffdffffee000-0xffffffdfffff1000 0x0000000175fa0000 12K
> > PTE D A . . . W R V
> > ---[ vmalloc() end ]---
> > ---[ Linear mapping ]---
> > 0xffffffe000000000-0xffffffe007000000 0x0000000080200000 112M
> > PMD D A . . X W R V
> > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> > 0xffffffe007000000-0xffffffe007800000 0x0000000087200000 8M
> > PMD D A . . X . R V
> > 0xffffffe007800000-0xffffffe007e00000 0x0000000087a00000 6M
> > PMD D A . . . . R V
> > 0xffffffe007e00000-0xffffffe0ffe00000 0x0000000088000000 3968M
> > PMD D A . . . W R V
> > #
> >
> > Any idea?
> >
> > _______________________________________________
> > linux-riscv mailing list
> > [email protected]
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
>
>
>
> --
> Regards,
> Atish
--
Regards,
Atish
Atish Patra <[email protected]> 於 2020年10月13日 週二 上午9:28寫道:
>
> On Mon, Oct 12, 2020 at 4:26 PM Atish Patra <[email protected]> wrote:
> >
> > On Mon, Oct 12, 2020 at 6:15 AM Greentime Hu <[email protected]> wrote:
> > >
> > > Atish Patra <[email protected]> 於 2020年10月10日 週六 上午5:13寫道:
> > > >
> > > > Currently, .init.text & .init.data are intermixed which makes it impossible
> > > > apply different permissions to them. .init.data shouldn't need exec
> > > > permissions while .init.text shouldn't have write permission.
> > > >
> > > > Keep them in separate sections so that different permissions are applied to
> > > > each section. This improves the kernel protection under
> > > > CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
> > > > entire _init section after it is freed so that those pages can be used for
> > > > other purpose.
> > > >
> > > > Signed-off-by: Atish Patra <[email protected]>
> > > > ---
> > > > arch/riscv/include/asm/sections.h | 2 ++
> > > > arch/riscv/include/asm/set_memory.h | 2 ++
> > > > arch/riscv/kernel/setup.c | 4 ++++
> > > > arch/riscv/kernel/vmlinux.lds.S | 10 +++++++++-
> > > > arch/riscv/mm/init.c | 6 ++++++
> > > > arch/riscv/mm/pageattr.c | 6 ++++++
> > > > 6 files changed, 29 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
> > > > index d60802bfafbc..730d2c4a844d 100644
> > > > --- a/arch/riscv/include/asm/sections.h
> > > > +++ b/arch/riscv/include/asm/sections.h
> > > > @@ -10,6 +10,8 @@
> > > > #include <asm-generic/sections.h>
> > > > extern char _start[];
> > > > extern char _start_kernel[];
> > > > +extern char __init_data_begin[], __init_data_end[];
> > > > +extern char __init_text_begin[], __init_text_end[];
> > > >
> > > > extern bool init_mem_is_free;
> > > >
> > > > diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> > > > index 4cc3a4e2afd3..913429c9c1ae 100644
> > > > --- a/arch/riscv/include/asm/set_memory.h
> > > > +++ b/arch/riscv/include/asm/set_memory.h
> > > > @@ -15,6 +15,7 @@ int set_memory_ro(unsigned long addr, int numpages);
> > > > int set_memory_rw(unsigned long addr, int numpages);
> > > > int set_memory_x(unsigned long addr, int numpages);
> > > > int set_memory_nx(unsigned long addr, int numpages);
> > > > +int set_memory_default(unsigned long addr, int numpages);
> > > > void protect_kernel_text_data(void);
> > > > #else
> > > > static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
> > > > @@ -22,6 +23,7 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
> > > > static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> > > > static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> > > > static inline void protect_kernel_text_data(void) {};
> > > > +static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
> > > > #endif
> > > >
> > > > int set_direct_map_invalid_noflush(struct page *page);
> > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > index 4176a2affd1d..b8a35ef0eab0 100644
> > > > --- a/arch/riscv/kernel/setup.c
> > > > +++ b/arch/riscv/kernel/setup.c
> > > > @@ -129,6 +129,10 @@ bool init_mem_is_free = false;
> > > >
> > > > void free_initmem(void)
> > > > {
> > > > + unsigned long init_begin = (unsigned long)__init_begin;
> > > > + unsigned long init_end = (unsigned long)__init_end;
> > > > +
> > > > + set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
> > > > free_initmem_default(POISON_FREE_INITMEM);
> > > > init_mem_is_free = true;
> > > > }
> > > > diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> > > > index 0807633f0dc8..15b9882588ae 100644
> > > > --- a/arch/riscv/kernel/vmlinux.lds.S
> > > > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > > > @@ -30,8 +30,8 @@ SECTIONS
> > > > . = ALIGN(PAGE_SIZE);
> > > >
> > > > __init_begin = .;
> > > > + __init_text_begin = .;
> > > > INIT_TEXT_SECTION(PAGE_SIZE)
> > > > - INIT_DATA_SECTION(16)
> > > > . = ALIGN(8);
> > > > __soc_early_init_table : {
> > > > __soc_early_init_table_start = .;
> > > > @@ -48,11 +48,19 @@ SECTIONS
> > > > {
> > > > EXIT_TEXT
> > > > }
> > > > +
> > > > + __init_text_end = .;
> > > > + . = ALIGN(SECTION_ALIGN);
> > > > + /* Start of init data section */
> > > > + __init_data_begin = .;
> > > > + INIT_DATA_SECTION(16)
> > > > .exit.data :
> > > > {
> > > > EXIT_DATA
> > > > }
> > > > PERCPU_SECTION(L1_CACHE_BYTES)
> > > > +
> > > > + __init_data_end = .;
> > > > __init_end = .;
> > > >
> > > > . = ALIGN(SECTION_ALIGN);
> > > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > > > index 7859a1d1b34d..3ef0eafcc7c7 100644
> > > > --- a/arch/riscv/mm/init.c
> > > > +++ b/arch/riscv/mm/init.c
> > > > @@ -627,11 +627,17 @@ void protect_kernel_text_data(void)
> > > > {
> > > > unsigned long text_start = (unsigned long)_text;
> > > > unsigned long text_end = (unsigned long)_etext;
> > > > + unsigned long init_text_start = (unsigned long)__init_text_begin;
> > > > + unsigned long init_text_end = (unsigned long)__init_text_end;
> > > > + unsigned long init_data_start = (unsigned long)__init_data_begin;
> > > > + unsigned long init_data_end = (unsigned long)__init_data_end;
> > > > unsigned long rodata_start = (unsigned long)__start_rodata;
> > > > unsigned long data_start = (unsigned long)_data;
> > > > unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
> > > >
> > > > + set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
> > > > set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
> > > > + set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
> > > > set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
> > > > set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
> > > > }
> > > > diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> > > > index 19fecb362d81..aecedaf086ab 100644
> > > > --- a/arch/riscv/mm/pageattr.c
> > > > +++ b/arch/riscv/mm/pageattr.c
> > > > @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> > > > return ret;
> > > > }
> > > >
> > > > +int set_memory_default(unsigned long addr, int numpages)
> > > > +{
> > > > + return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
> > > > + __pgprot(0));
> > > > +}
> > > > +
> > > > int set_memory_ro(unsigned long addr, int numpages)
> > > > {
> > > > return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
> > >
> > >
> > > Hi Atish,
> > >
> > > I tested this patchset and CONFIG_DEBUG_WX=y
> > >
> >
> > Thanks for testing the patch.
> >
> > > [ 2.664012] Freeing unused kernel memory: 114420K
> > > [ 2.666081] ------------[ cut here ]------------
> > > [ 2.666779] riscv/mm: Found insecure W+X mapping at address
> > > (____ptrval____)/0xffffffe000000000
> > > [ 2.668004] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:200
> > > note_page+0xc2/0x238
> > > [ 2.669147] Modules linked in:
> > > [ 2.669735] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > > 5.9.0-rc8-00033-gfacf070a80ea #153
> > > [ 2.670466] epc: ffffffe00700697c ra : ffffffe00700697c sp : ffffffe0fba9bc10
> > > [ 2.671054] gp : ffffffe007e73078 tp : ffffffe0fba90000 t0 :
> > > ffffffe007e861d0
> > > [ 2.671683] t1 : 0000000000000064 t2 : ffffffe007801664 s0 :
> > > ffffffe0fba9bc60
> > > [ 2.672499] s1 : ffffffe0fba9bde8 a0 : 0000000000000053 a1 :
> > > 0000000000000020
> > > [ 2.673119] a2 : 0000000000000000 a3 : 00000000000000f4 a4 :
> > > d4328dc070ccb100
> > > [ 2.673729] a5 : d4328dc070ccb100 a6 : 0000000000000000 a7 :
> > > ffffffe007851e98
> > > [ 2.674333] s2 : ffffffe007000000 s3 : ffffffe0fba9bde8 s4 :
> > > 0000000087200000
> > > [ 2.674963] s5 : 00000000000000cb s6 : 0000000000000003 s7 :
> > > ffffffe007e7ac00
> > > [ 2.675564] s8 : ffffffe000000000 s9 : ffffffe007000000 s10:
> > > ffffffe000000000
> > > [ 2.676392] s11: ffffffe040000000 t3 : 000000000001da48 t4 :
> > > 000000000001da48
> > > [ 2.676992] t5 : ffffffe007e74490 t6 : ffffffe007e81432
> > > [ 2.677449] status: 0000000000000120 badaddr: 0000000000000000
> > > cause: 0000000000000003
> > > [ 2.678128] ---[ end trace 5f0a86dbe986db9b ]---
> > > [ 2.678952] Checked W+X mappings: failed, 28672 W+X pages found
> > > [ 2.679737] Run /init as init process
> > >
> >
> > This would be triggered for the current kernel as well as we don't
> > protect the permission for __init section at all. As you correctly
> > pointed out, __init & .head sections are mapped with same permissions
> > because of 2MB mappings.
> >
> > Currently, the entire head.text & init section have RWX permission.
> > This patch is trying remove the write permission from .init.text &
> > .head.text and execute permission from .init.data until kernel is
> > booted.
> > It doesn't provide full protection but better than the current scheme.
> >
> > To remove the write permission of .head.txt only, we have to keep
> > .head.txt & .init.text section in separate sections. The linear
> > mapping would look like this in that case.
> > There are no issues as such but kernel size would increase by another 2M.
> >
> > ---[ Linear mapping ]---
> > 0xffffffe000000000-0xffffffe000200000 0x0000000080200000 2M
> > PMD D A . . X . R V
> > 0xffffffe000200000-0xffffffe000600000 0x0000000080400000 4M
> > PMD D A . . . W R V
> > 0xffffffe000600000-0xffffffe000e00000 0x0000000080800000 8M
> > PMD D A . . X . R V
> > 0xffffffe000e00000-0xffffffe001400000 0x0000000081000000 6M
> > PMD D A . . . . R V
> > 0xffffffe001400000-0xffffffe03fe00000 0x0000000081600000 1002M
> > PMD D A . . . W R V
> >
> > The other solution would be move init section below text. Keep text &
> > head in one section.
> > The .init.text & .init.data will be in separate sections after that.
> > Here is the mapping in that case.
> >
> > ---[ Linear mapping ]---
> > 0xffffffe000000000-0xffffffe000800000 0x0000000080200000 8M
> > PMD D A . . X . R V
> > 0xffffffe000800000-0xffffffe000c00000 0x0000000080a00000 4M
> > PMD D A . . . W R V
> > 0xffffffe000c00000-0xffffffe001200000 0x0000000080e00000 6M
> > PMD D A . . . . R V
> > 0xffffffe001200000-0xffffffe03fe00000 0x0000000081400000 1004M
> > PMD D A . . . W R V
> >
> > I prefer the 2nd approach compared to the first one as it saves memory
> > and we can fix lockdep issue without adding arch_is_kernel_data
> > to sections.h (Guo's patch).
> >
> > However, the 2nd approach throws this problem if
> > CONFIG_HARDENED_USERCOPY is enabled.
> >
> > net/ipv4/ipconfig.o: in function `ic_setup_routes':
> > /home/atish/workspace/linux/net/ipv4/ipconfig.c:400:(.init.text+0x1c4):
> > relocation truncated to fit: R_RISCV_JAL against symbol `ip_rt_ioctl'
> > defined in .text section in net/ipv4/fib_frontend.o
> > make: *** [Makefile:1162: vmlinux] Error 1
> >
> > I am currently looking into this to understand why R_RISCV_JAL is
> > generated a generic function invocation
> > where auipc + jalr should have been used.
> >
>
> I checked the relocation is is correct 00000000000001d0 R_RISCV_CALL
> ip_rt_ioctl
> The assembly from objdump also shows a pair of auipc + jalr.
>
> 1d0: 00000097 auipc ra,0x0
> 1d4: 000080e7 jalr ra # 1d0 <.LBE348+0x4>
>
> I don't understand why the toolchain is complaining about relocation
> error only when
> CONFIG_HARDENED_USERCOPY is enabled.
>
> Are we seeing some subtle toolchain bug ?
>
"relocation truncated to fit" usually means that the jump address is
too far to jump.
Maybe trying "-mno-relax -Wl,--no-relax" to disable linker relax or
checking if somewhere just using jal instead of call?
On Tue, Oct 13, 2020 at 6:21 PM Jim Wilson <[email protected]> wrote:
>
> On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <[email protected]> wrote:
> > This happens only when copy_from_user is called from function that is
> > annotated with __init.
> > Adding Kito & Jim for their input
> >
> > @kito, @Jim: Please let me know if I should create a issue in
> > riscv-gnu-toolchain repo or somewhere else.
>
> I can't do anything useful without a testcase that I can use to
> reproduce the problem. The interactions here are complex, so pointing
> at lines of code or kernel config options doesn't give me any useful
> info.
>
> Relaxation can convert calls to a jal. I don't know of any open bugs
> in this area that can generate relocation errors. if it is a
> relaxation error then turning off relaxation should work around the
> problem as you suggested.
>
> A kernel build problem is serious. I think this is worth a bug
> report. FSF binutils or riscv-gnu-toolchain is fine.
>
I have created an issue with detailed descriptions and reproduction steps.
Please let me know if you need anything else.
> Jim
--
Regards,
Atish
On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <[email protected]> wrote:
> This happens only when copy_from_user is called from function that is
> annotated with __init.
> Adding Kito & Jim for their input
>
> @kito, @Jim: Please let me know if I should create a issue in
> riscv-gnu-toolchain repo or somewhere else.
I can't do anything useful without a testcase that I can use to
reproduce the problem. The interactions here are complex, so pointing
at lines of code or kernel config options doesn't give me any useful
info.
Relaxation can convert calls to a jal. I don't know of any open bugs
in this area that can generate relocation errors. if it is a
relaxation error then turning off relaxation should work around the
problem as you suggested.
A kernel build problem is serious. I think this is worth a bug
report. FSF binutils or riscv-gnu-toolchain is fine.
Jim
On Mon, Oct 12, 2020 at 8:08 PM Greentime Hu <[email protected]> wrote:
>
> Atish Patra <[email protected]> 於 2020年10月13日 週二 上午9:28寫道:
> >
> > On Mon, Oct 12, 2020 at 4:26 PM Atish Patra <[email protected]> wrote:
> > >
> > > On Mon, Oct 12, 2020 at 6:15 AM Greentime Hu <[email protected]> wrote:
> > > >
> > > > Atish Patra <[email protected]> 於 2020年10月10日 週六 上午5:13寫道:
> > > > >
> > > > > Currently, .init.text & .init.data are intermixed which makes it impossible
> > > > > apply different permissions to them. .init.data shouldn't need exec
> > > > > permissions while .init.text shouldn't have write permission.
> > > > >
> > > > > Keep them in separate sections so that different permissions are applied to
> > > > > each section. This improves the kernel protection under
> > > > > CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
> > > > > entire _init section after it is freed so that those pages can be used for
> > > > > other purpose.
> > > > >
> > > > > Signed-off-by: Atish Patra <[email protected]>
> > > > > ---
> > > > > arch/riscv/include/asm/sections.h | 2 ++
> > > > > arch/riscv/include/asm/set_memory.h | 2 ++
> > > > > arch/riscv/kernel/setup.c | 4 ++++
> > > > > arch/riscv/kernel/vmlinux.lds.S | 10 +++++++++-
> > > > > arch/riscv/mm/init.c | 6 ++++++
> > > > > arch/riscv/mm/pageattr.c | 6 ++++++
> > > > > 6 files changed, 29 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
> > > > > index d60802bfafbc..730d2c4a844d 100644
> > > > > --- a/arch/riscv/include/asm/sections.h
> > > > > +++ b/arch/riscv/include/asm/sections.h
> > > > > @@ -10,6 +10,8 @@
> > > > > #include <asm-generic/sections.h>
> > > > > extern char _start[];
> > > > > extern char _start_kernel[];
> > > > > +extern char __init_data_begin[], __init_data_end[];
> > > > > +extern char __init_text_begin[], __init_text_end[];
> > > > >
> > > > > extern bool init_mem_is_free;
> > > > >
> > > > > diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> > > > > index 4cc3a4e2afd3..913429c9c1ae 100644
> > > > > --- a/arch/riscv/include/asm/set_memory.h
> > > > > +++ b/arch/riscv/include/asm/set_memory.h
> > > > > @@ -15,6 +15,7 @@ int set_memory_ro(unsigned long addr, int numpages);
> > > > > int set_memory_rw(unsigned long addr, int numpages);
> > > > > int set_memory_x(unsigned long addr, int numpages);
> > > > > int set_memory_nx(unsigned long addr, int numpages);
> > > > > +int set_memory_default(unsigned long addr, int numpages);
> > > > > void protect_kernel_text_data(void);
> > > > > #else
> > > > > static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
> > > > > @@ -22,6 +23,7 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
> > > > > static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> > > > > static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> > > > > static inline void protect_kernel_text_data(void) {};
> > > > > +static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
> > > > > #endif
> > > > >
> > > > > int set_direct_map_invalid_noflush(struct page *page);
> > > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > > index 4176a2affd1d..b8a35ef0eab0 100644
> > > > > --- a/arch/riscv/kernel/setup.c
> > > > > +++ b/arch/riscv/kernel/setup.c
> > > > > @@ -129,6 +129,10 @@ bool init_mem_is_free = false;
> > > > >
> > > > > void free_initmem(void)
> > > > > {
> > > > > + unsigned long init_begin = (unsigned long)__init_begin;
> > > > > + unsigned long init_end = (unsigned long)__init_end;
> > > > > +
> > > > > + set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
> > > > > free_initmem_default(POISON_FREE_INITMEM);
> > > > > init_mem_is_free = true;
> > > > > }
> > > > > diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> > > > > index 0807633f0dc8..15b9882588ae 100644
> > > > > --- a/arch/riscv/kernel/vmlinux.lds.S
> > > > > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > > > > @@ -30,8 +30,8 @@ SECTIONS
> > > > > . = ALIGN(PAGE_SIZE);
> > > > >
> > > > > __init_begin = .;
> > > > > + __init_text_begin = .;
> > > > > INIT_TEXT_SECTION(PAGE_SIZE)
> > > > > - INIT_DATA_SECTION(16)
> > > > > . = ALIGN(8);
> > > > > __soc_early_init_table : {
> > > > > __soc_early_init_table_start = .;
> > > > > @@ -48,11 +48,19 @@ SECTIONS
> > > > > {
> > > > > EXIT_TEXT
> > > > > }
> > > > > +
> > > > > + __init_text_end = .;
> > > > > + . = ALIGN(SECTION_ALIGN);
> > > > > + /* Start of init data section */
> > > > > + __init_data_begin = .;
> > > > > + INIT_DATA_SECTION(16)
> > > > > .exit.data :
> > > > > {
> > > > > EXIT_DATA
> > > > > }
> > > > > PERCPU_SECTION(L1_CACHE_BYTES)
> > > > > +
> > > > > + __init_data_end = .;
> > > > > __init_end = .;
> > > > >
> > > > > . = ALIGN(SECTION_ALIGN);
> > > > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > > > > index 7859a1d1b34d..3ef0eafcc7c7 100644
> > > > > --- a/arch/riscv/mm/init.c
> > > > > +++ b/arch/riscv/mm/init.c
> > > > > @@ -627,11 +627,17 @@ void protect_kernel_text_data(void)
> > > > > {
> > > > > unsigned long text_start = (unsigned long)_text;
> > > > > unsigned long text_end = (unsigned long)_etext;
> > > > > + unsigned long init_text_start = (unsigned long)__init_text_begin;
> > > > > + unsigned long init_text_end = (unsigned long)__init_text_end;
> > > > > + unsigned long init_data_start = (unsigned long)__init_data_begin;
> > > > > + unsigned long init_data_end = (unsigned long)__init_data_end;
> > > > > unsigned long rodata_start = (unsigned long)__start_rodata;
> > > > > unsigned long data_start = (unsigned long)_data;
> > > > > unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
> > > > >
> > > > > + set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
> > > > > set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
> > > > > + set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
> > > > > set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
> > > > > set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
> > > > > }
> > > > > diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> > > > > index 19fecb362d81..aecedaf086ab 100644
> > > > > --- a/arch/riscv/mm/pageattr.c
> > > > > +++ b/arch/riscv/mm/pageattr.c
> > > > > @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> > > > > return ret;
> > > > > }
> > > > >
> > > > > +int set_memory_default(unsigned long addr, int numpages)
> > > > > +{
> > > > > + return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
> > > > > + __pgprot(0));
> > > > > +}
> > > > > +
> > > > > int set_memory_ro(unsigned long addr, int numpages)
> > > > > {
> > > > > return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
> > > >
> > > >
> > > > Hi Atish,
> > > >
> > > > I tested this patchset and CONFIG_DEBUG_WX=y
> > > >
> > >
> > > Thanks for testing the patch.
> > >
> > > > [ 2.664012] Freeing unused kernel memory: 114420K
> > > > [ 2.666081] ------------[ cut here ]------------
> > > > [ 2.666779] riscv/mm: Found insecure W+X mapping at address
> > > > (____ptrval____)/0xffffffe000000000
> > > > [ 2.668004] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:200
> > > > note_page+0xc2/0x238
> > > > [ 2.669147] Modules linked in:
> > > > [ 2.669735] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > > > 5.9.0-rc8-00033-gfacf070a80ea #153
> > > > [ 2.670466] epc: ffffffe00700697c ra : ffffffe00700697c sp : ffffffe0fba9bc10
> > > > [ 2.671054] gp : ffffffe007e73078 tp : ffffffe0fba90000 t0 :
> > > > ffffffe007e861d0
> > > > [ 2.671683] t1 : 0000000000000064 t2 : ffffffe007801664 s0 :
> > > > ffffffe0fba9bc60
> > > > [ 2.672499] s1 : ffffffe0fba9bde8 a0 : 0000000000000053 a1 :
> > > > 0000000000000020
> > > > [ 2.673119] a2 : 0000000000000000 a3 : 00000000000000f4 a4 :
> > > > d4328dc070ccb100
> > > > [ 2.673729] a5 : d4328dc070ccb100 a6 : 0000000000000000 a7 :
> > > > ffffffe007851e98
> > > > [ 2.674333] s2 : ffffffe007000000 s3 : ffffffe0fba9bde8 s4 :
> > > > 0000000087200000
> > > > [ 2.674963] s5 : 00000000000000cb s6 : 0000000000000003 s7 :
> > > > ffffffe007e7ac00
> > > > [ 2.675564] s8 : ffffffe000000000 s9 : ffffffe007000000 s10:
> > > > ffffffe000000000
> > > > [ 2.676392] s11: ffffffe040000000 t3 : 000000000001da48 t4 :
> > > > 000000000001da48
> > > > [ 2.676992] t5 : ffffffe007e74490 t6 : ffffffe007e81432
> > > > [ 2.677449] status: 0000000000000120 badaddr: 0000000000000000
> > > > cause: 0000000000000003
> > > > [ 2.678128] ---[ end trace 5f0a86dbe986db9b ]---
> > > > [ 2.678952] Checked W+X mappings: failed, 28672 W+X pages found
> > > > [ 2.679737] Run /init as init process
> > > >
> > >
> > > This would be triggered for the current kernel as well as we don't
> > > protect the permission for __init section at all. As you correctly
> > > pointed out, __init & .head sections are mapped with same permissions
> > > because of 2MB mappings.
> > >
> > > Currently, the entire head.text & init section have RWX permission.
> > > This patch is trying remove the write permission from .init.text &
> > > .head.text and execute permission from .init.data until kernel is
> > > booted.
> > > It doesn't provide full protection but better than the current scheme.
> > >
> > > To remove the write permission of .head.txt only, we have to keep
> > > .head.txt & .init.text section in separate sections. The linear
> > > mapping would look like this in that case.
> > > There are no issues as such but kernel size would increase by another 2M.
> > >
> > > ---[ Linear mapping ]---
> > > 0xffffffe000000000-0xffffffe000200000 0x0000000080200000 2M
> > > PMD D A . . X . R V
> > > 0xffffffe000200000-0xffffffe000600000 0x0000000080400000 4M
> > > PMD D A . . . W R V
> > > 0xffffffe000600000-0xffffffe000e00000 0x0000000080800000 8M
> > > PMD D A . . X . R V
> > > 0xffffffe000e00000-0xffffffe001400000 0x0000000081000000 6M
> > > PMD D A . . . . R V
> > > 0xffffffe001400000-0xffffffe03fe00000 0x0000000081600000 1002M
> > > PMD D A . . . W R V
> > >
> > > The other solution would be move init section below text. Keep text &
> > > head in one section.
> > > The .init.text & .init.data will be in separate sections after that.
> > > Here is the mapping in that case.
> > >
> > > ---[ Linear mapping ]---
> > > 0xffffffe000000000-0xffffffe000800000 0x0000000080200000 8M
> > > PMD D A . . X . R V
> > > 0xffffffe000800000-0xffffffe000c00000 0x0000000080a00000 4M
> > > PMD D A . . . W R V
> > > 0xffffffe000c00000-0xffffffe001200000 0x0000000080e00000 6M
> > > PMD D A . . . . R V
> > > 0xffffffe001200000-0xffffffe03fe00000 0x0000000081400000 1004M
> > > PMD D A . . . W R V
> > >
> > > I prefer the 2nd approach compared to the first one as it saves memory
> > > and we can fix lockdep issue without adding arch_is_kernel_data
> > > to sections.h (Guo's patch).
> > >
> > > However, the 2nd approach throws this problem if
> > > CONFIG_HARDENED_USERCOPY is enabled.
> > >
> > > net/ipv4/ipconfig.o: in function `ic_setup_routes':
> > > /home/atish/workspace/linux/net/ipv4/ipconfig.c:400:(.init.text+0x1c4):
> > > relocation truncated to fit: R_RISCV_JAL against symbol `ip_rt_ioctl'
> > > defined in .text section in net/ipv4/fib_frontend.o
> > > make: *** [Makefile:1162: vmlinux] Error 1
> > >
> > > I am currently looking into this to understand why R_RISCV_JAL is
> > > generated a generic function invocation
> > > where auipc + jalr should have been used.
> > >
> >
> > I checked the relocation is is correct 00000000000001d0 R_RISCV_CALL
> > ip_rt_ioctl
> > The assembly from objdump also shows a pair of auipc + jalr.
> >
> > 1d0: 00000097 auipc ra,0x0
> > 1d4: 000080e7 jalr ra # 1d0 <.LBE348+0x4>
> >
> > I don't understand why the toolchain is complaining about relocation
> > error only when
> > CONFIG_HARDENED_USERCOPY is enabled.
> >
> > Are we seeing some subtle toolchain bug ?
> >
> "relocation truncated to fit" usually means that the jump address is
> too far to jump.
> Maybe trying "-mno-relax -Wl,--no-relax" to disable linker relax or
> checking if somewhere just using jal instead of call?
Yes. As I said the relocation id seems correct from the objdump.
I narrowed down the issue to a __builtin_constant_p
Here is the code path:
ip_rt_ioctl->rtentry_to_fib_config->copy_from_user->check_copy_size->check_object_size
#ifdef CONFIG_HARDENED_USERCOPY
112 extern void __check_object_size(const void *ptr, unsigned long n,
113 bool to_user);
114
115 static __always_inline void check_object_size(const void *ptr,
unsigned long n,
116 bool to_user)
117 {
118 if (!__builtin_constant_p(n))
---------------------> Commenting this line avoids the linker issue
119
__check_object_size(ptr, n, to_user);
120 }
121 #else
122 static inline void check_object_size(const void *ptr, unsigned
long n,
123 bool to_user)
124 { }
125 #endif /* CONFIG_HARDENED_USERCOPY */
This happens only when copy_from_user is called from function that is
annotated with __init.
Adding Kito & Jim for their input
@kito, @Jim: Please let me know if I should create a issue in
riscv-gnu-toolchain repo or somewhere else.
--
Regards,
Atish
On Tue, Oct 13, 2020 at 10:24 PM Atish Patra <[email protected]> wrote:
>
> On Tue, Oct 13, 2020 at 6:21 PM Jim Wilson <[email protected]> wrote:
> >
> > On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <[email protected]> wrote:
> > > This happens only when copy_from_user is called from function that is
> > > annotated with __init.
> > > Adding Kito & Jim for their input
> > >
> > > @kito, @Jim: Please let me know if I should create a issue in
> > > riscv-gnu-toolchain repo or somewhere else.
> >
> > I can't do anything useful without a testcase that I can use to
> > reproduce the problem. The interactions here are complex, so pointing
> > at lines of code or kernel config options doesn't give me any useful
> > info.
> >
> > Relaxation can convert calls to a jal. I don't know of any open bugs
> > in this area that can generate relocation errors. if it is a
> > relaxation error then turning off relaxation should work around the
> > problem as you suggested.
> >
> > A kernel build problem is serious. I think this is worth a bug
> > report. FSF binutils or riscv-gnu-toolchain is fine.
> >
>
> I have created an issue with detailed descriptions and reproduction steps.
> Please let me know if you need anything else.
>
It may be a toolchain issue. Here is the ongoing discussion in case
anybody else is interested.
https://github.com/riscv/riscv-gnu-toolchain/issues/738
> > Jim
>
>
>
> --
> Regards,
> Atish
--
Regards,
Atish
On Thu, Oct 22, 2020 at 7:01 AM Atish Patra <[email protected]> wrote:
>
> On Fri, Oct 16, 2020 at 11:24 AM Atish Patra <[email protected]> wrote:
> >
> > On Tue, Oct 13, 2020 at 10:24 PM Atish Patra <[email protected]> wrote:
> > >
> > > On Tue, Oct 13, 2020 at 6:21 PM Jim Wilson <[email protected]> wrote:
> > > >
> > > > On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <[email protected]> wrote:
> > > > > This happens only when copy_from_user is called from function that is
> > > > > annotated with __init.
> > > > > Adding Kito & Jim for their input
> > > > >
> > > > > @kito, @Jim: Please let me know if I should create a issue in
> > > > > riscv-gnu-toolchain repo or somewhere else.
> > > >
> > > > I can't do anything useful without a testcase that I can use to
> > > > reproduce the problem. The interactions here are complex, so pointing
> > > > at lines of code or kernel config options doesn't give me any useful
> > > > info.
> > > >
> > > > Relaxation can convert calls to a jal. I don't know of any open bugs
> > > > in this area that can generate relocation errors. if it is a
> > > > relaxation error then turning off relaxation should work around the
> > > > problem as you suggested.
> > > >
> > > > A kernel build problem is serious. I think this is worth a bug
> > > > report. FSF binutils or riscv-gnu-toolchain is fine.
> > > >
> > >
> > > I have created an issue with detailed descriptions and reproduction steps.
> > > Please let me know if you need anything else.
> > >
> >
> > It may be a toolchain issue. Here is the ongoing discussion in case
> > anybody else is interested.
> >
> > https://github.com/riscv/riscv-gnu-toolchain/issues/738
> >
> > > > Jim
> > >
> > >
> > >
> > > --
> > > Regards,
> > > Atish
> >
> >
> >
> > --
> > Regards,
> > Atish
>
> Thanks to Jim, we know the cause now. Jim has provided an excellent
> analysis of the issue in the github issue report.
> https://github.com/riscv/riscv-gnu-toolchain/issues/738
>
> To summarize, the linker relaxation code is not aware of the
> alignments between sections.
> That's why it relaxes the calls from .text to .init.text and converts
> a auipc+jalr pair to jal even if the address can't be fit +/- 1MB.
>
> There are few ways we can solve this problem.
>
> 1. As per Jim's suggestion, linker relaxation code is aware of the
> section alignments. We can mark .init.text as a 2MB aligned section.
> For calls within a section, section's alignment will be used in the
> calculation. For calls across sections, e.g. from .init.text to .text,
> the maximum
> section alignment of every section will be used. Thus, all
> relaxation within .init.text and between any sections will be
> impacted.
> Thus, it avoids the error but results in the following increase in
> size of various sections.
> section change in size (in bytes)
> .head.text +4
> .text +40
> .init.text. +6530
> .exit.text +84
>
> The only significant increase is .init.text but it is freed after
> boot. Thus, I don't see any significant performance degradation due to
> that.
>
> Here is the diff
> --- a/arch/riscv/kernel/vmlinux.lds.S
> +++ b/arch/riscv/kernel/vmlinux.lds.S
> @@ -51,7 +51,13 @@ SECTIONS
> . = ALIGN(SECTION_ALIGN);
> __init_begin = .;
> __init_text_begin = .;
> - INIT_TEXT_SECTION(PAGE_SIZE)
> + . = ALIGN(PAGE_SIZE); \
> + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
> ALIGN(SECTION_ALIGN) { \
> + _sinittext = .; \
> + INIT_TEXT \
> + _einittext = .; \
> + }
> +
> . = ALIGN(8);
> __soc_early_init_table : {
> __soc_early_init_table_start = .;
>
> 2. We will continue to keep head.txt & .init.text together before
> .text. However, we will map the pages that contain head & init.text at
> page
> granularity so that .head.text and init.text can have different
> permissions. I have not measured the performance impact of this but it
> won't
> too bad given that the combined size of sections .head.txt &
> .init.text is 200K. So we are talking about page level permission only
> for
> ~50 pages during boot.
>
> 3. Keep head.text in a separate 2MB aligned section. .init.text will
> follow .head.text in its own section as well. This increases the
> kernel
> size by 2MB for MMU kernels. For nommu case, it will only increase
> by 64 bytes due to smaller section alignment for nommu kernels.
>
> Both solutions 1 & 2 come at minimal performance on boot time while
> solution 3 comes at increased kernel size.
>
> Any preference ?
I prefer solution#3 because:
1. This will help us avoid special handling of static objects
2. This will make RISC-V linker script more aligned with other
major architectures
I don't think solution#3 will increase the size of the kernel by 2MB. We
can make head.text part of text section. There will be only one section
alignment between text section and init section but the existing section
alignment between init section and text section will be removed. In other
words, number of section alignments will remain same.
Regards,
Anup
On Thu, Oct 22, 2020 at 10:33 AM Anup Patel <[email protected]> wrote:
>
> On Thu, Oct 22, 2020 at 7:01 AM Atish Patra <[email protected]> wrote:
> >
> > On Fri, Oct 16, 2020 at 11:24 AM Atish Patra <[email protected]> wrote:
> > >
> > > On Tue, Oct 13, 2020 at 10:24 PM Atish Patra <[email protected]> wrote:
> > > >
> > > > On Tue, Oct 13, 2020 at 6:21 PM Jim Wilson <[email protected]> wrote:
> > > > >
> > > > > On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <[email protected]> wrote:
> > > > > > This happens only when copy_from_user is called from function that is
> > > > > > annotated with __init.
> > > > > > Adding Kito & Jim for their input
> > > > > >
> > > > > > @kito, @Jim: Please let me know if I should create a issue in
> > > > > > riscv-gnu-toolchain repo or somewhere else.
> > > > >
> > > > > I can't do anything useful without a testcase that I can use to
> > > > > reproduce the problem. The interactions here are complex, so pointing
> > > > > at lines of code or kernel config options doesn't give me any useful
> > > > > info.
> > > > >
> > > > > Relaxation can convert calls to a jal. I don't know of any open bugs
> > > > > in this area that can generate relocation errors. if it is a
> > > > > relaxation error then turning off relaxation should work around the
> > > > > problem as you suggested.
> > > > >
> > > > > A kernel build problem is serious. I think this is worth a bug
> > > > > report. FSF binutils or riscv-gnu-toolchain is fine.
> > > > >
> > > >
> > > > I have created an issue with detailed descriptions and reproduction steps.
> > > > Please let me know if you need anything else.
> > > >
> > >
> > > It may be a toolchain issue. Here is the ongoing discussion in case
> > > anybody else is interested.
> > >
> > > https://github.com/riscv/riscv-gnu-toolchain/issues/738
> > >
> > > > > Jim
> > > >
> > > >
> > > >
> > > > --
> > > > Regards,
> > > > Atish
> > >
> > >
> > >
> > > --
> > > Regards,
> > > Atish
> >
> > Thanks to Jim, we know the cause now. Jim has provided an excellent
> > analysis of the issue in the github issue report.
> > https://github.com/riscv/riscv-gnu-toolchain/issues/738
> >
> > To summarize, the linker relaxation code is not aware of the
> > alignments between sections.
> > That's why it relaxes the calls from .text to .init.text and converts
> > a auipc+jalr pair to jal even if the address can't be fit +/- 1MB.
> >
> > There are few ways we can solve this problem.
> >
> > 1. As per Jim's suggestion, linker relaxation code is aware of the
> > section alignments. We can mark .init.text as a 2MB aligned section.
> > For calls within a section, section's alignment will be used in the
> > calculation. For calls across sections, e.g. from .init.text to .text,
> > the maximum
> > section alignment of every section will be used. Thus, all
> > relaxation within .init.text and between any sections will be
> > impacted.
> > Thus, it avoids the error but results in the following increase in
> > size of various sections.
> > section change in size (in bytes)
> > .head.text +4
> > .text +40
> > .init.text. +6530
> > .exit.text +84
> >
> > The only significant increase is .init.text but it is freed after
> > boot. Thus, I don't see any significant performance degradation due to
> > that.
> >
> > Here is the diff
> > --- a/arch/riscv/kernel/vmlinux.lds.S
> > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > @@ -51,7 +51,13 @@ SECTIONS
> > . = ALIGN(SECTION_ALIGN);
> > __init_begin = .;
> > __init_text_begin = .;
> > - INIT_TEXT_SECTION(PAGE_SIZE)
> > + . = ALIGN(PAGE_SIZE); \
> > + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
> > ALIGN(SECTION_ALIGN) { \
> > + _sinittext = .; \
> > + INIT_TEXT \
> > + _einittext = .; \
> > + }
> > +
> > . = ALIGN(8);
> > __soc_early_init_table : {
> > __soc_early_init_table_start = .;
> >
> > 2. We will continue to keep head.txt & .init.text together before
> > .text. However, we will map the pages that contain head & init.text at
> > page
> > granularity so that .head.text and init.text can have different
> > permissions. I have not measured the performance impact of this but it
> > won't
> > too bad given that the combined size of sections .head.txt &
> > .init.text is 200K. So we are talking about page level permission only
> > for
> > ~50 pages during boot.
> >
> > 3. Keep head.text in a separate 2MB aligned section. .init.text will
> > follow .head.text in its own section as well. This increases the
> > kernel
> > size by 2MB for MMU kernels. For nommu case, it will only increase
> > by 64 bytes due to smaller section alignment for nommu kernels.
> >
> > Both solutions 1 & 2 come at minimal performance on boot time while
> > solution 3 comes at increased kernel size.
> >
> > Any preference ?
>
> I prefer solution#3 because:
> 1. This will help us avoid special handling of static objects
> 2. This will make RISC-V linker script more aligned with other
> major architectures
>
> I don't think solution#3 will increase the size of the kernel by 2MB. We
> can make head.text part of text section. There will be only one section
> alignment between text section and init section but the existing section
> alignment between init section and text section will be removed. In other
> words, number of section alignments will remain same.
I think we will need to incorporate Jim's suggestion irrespective of the
solution we choose because without Jim's changes we can hit the
linker relaxation issue in solution#2 as well.
Regards,
Anup
On Fri, Oct 16, 2020 at 11:24 AM Atish Patra <[email protected]> wrote:
>
> On Tue, Oct 13, 2020 at 10:24 PM Atish Patra <[email protected]> wrote:
> >
> > On Tue, Oct 13, 2020 at 6:21 PM Jim Wilson <[email protected]> wrote:
> > >
> > > On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <[email protected]> wrote:
> > > > This happens only when copy_from_user is called from function that is
> > > > annotated with __init.
> > > > Adding Kito & Jim for their input
> > > >
> > > > @kito, @Jim: Please let me know if I should create a issue in
> > > > riscv-gnu-toolchain repo or somewhere else.
> > >
> > > I can't do anything useful without a testcase that I can use to
> > > reproduce the problem. The interactions here are complex, so pointing
> > > at lines of code or kernel config options doesn't give me any useful
> > > info.
> > >
> > > Relaxation can convert calls to a jal. I don't know of any open bugs
> > > in this area that can generate relocation errors. if it is a
> > > relaxation error then turning off relaxation should work around the
> > > problem as you suggested.
> > >
> > > A kernel build problem is serious. I think this is worth a bug
> > > report. FSF binutils or riscv-gnu-toolchain is fine.
> > >
> >
> > I have created an issue with detailed descriptions and reproduction steps.
> > Please let me know if you need anything else.
> >
>
> It may be a toolchain issue. Here is the ongoing discussion in case
> anybody else is interested.
>
> https://github.com/riscv/riscv-gnu-toolchain/issues/738
>
> > > Jim
> >
> >
> >
> > --
> > Regards,
> > Atish
>
>
>
> --
> Regards,
> Atish
Thanks to Jim, we know the cause now. Jim has provided an excellent
analysis of the issue in the github issue report.
https://github.com/riscv/riscv-gnu-toolchain/issues/738
To summarize, the linker relaxation code is not aware of the
alignments between sections.
That's why it relaxes the calls from .text to .init.text and converts
a auipc+jalr pair to jal even if the address can't be fit +/- 1MB.
There are few ways we can solve this problem.
1. As per Jim's suggestion, linker relaxation code is aware of the
section alignments. We can mark .init.text as a 2MB aligned section.
For calls within a section, section's alignment will be used in the
calculation. For calls across sections, e.g. from .init.text to .text,
the maximum
section alignment of every section will be used. Thus, all
relaxation within .init.text and between any sections will be
impacted.
Thus, it avoids the error but results in the following increase in
size of various sections.
section change in size (in bytes)
.head.text +4
.text +40
.init.text. +6530
.exit.text +84
The only significant increase is .init.text but it is freed after
boot. Thus, I don't see any significant performance degradation due to
that.
Here is the diff
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -51,7 +51,13 @@ SECTIONS
. = ALIGN(SECTION_ALIGN);
__init_begin = .;
__init_text_begin = .;
- INIT_TEXT_SECTION(PAGE_SIZE)
+ . = ALIGN(PAGE_SIZE); \
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
ALIGN(SECTION_ALIGN) { \
+ _sinittext = .; \
+ INIT_TEXT \
+ _einittext = .; \
+ }
+
. = ALIGN(8);
__soc_early_init_table : {
__soc_early_init_table_start = .;
2. We will continue to keep head.txt & .init.text together before
.text. However, we will map the pages that contain head & init.text at
page
granularity so that .head.text and init.text can have different
permissions. I have not measured the performance impact of this but it
won't
too bad given that the combined size of sections .head.txt &
.init.text is 200K. So we are talking about page level permission only
for
~50 pages during boot.
3. Keep head.text in a separate 2MB aligned section. .init.text will
follow .head.text in its own section as well. This increases the
kernel
size by 2MB for MMU kernels. For nommu case, it will only increase
by 64 bytes due to smaller section alignment for nommu kernels.
Both solutions 1 & 2 come at minimal performance on boot time while
solution 3 comes at increased kernel size.
Any preference ?
--
Regards,
Atish
On Thu, Oct 22, 2020 at 12:22 AM Anup Patel <[email protected]> wrote:
>
> On Thu, Oct 22, 2020 at 10:33 AM Anup Patel <[email protected]> wrote:
> >
> > On Thu, Oct 22, 2020 at 7:01 AM Atish Patra <[email protected]> wrote:
> > >
> > > On Fri, Oct 16, 2020 at 11:24 AM Atish Patra <[email protected]> wrote:
> > > >
> > > > On Tue, Oct 13, 2020 at 10:24 PM Atish Patra <[email protected]> wrote:
> > > > >
> > > > > On Tue, Oct 13, 2020 at 6:21 PM Jim Wilson <[email protected]> wrote:
> > > > > >
> > > > > > On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <[email protected]> wrote:
> > > > > > > This happens only when copy_from_user is called from function that is
> > > > > > > annotated with __init.
> > > > > > > Adding Kito & Jim for their input
> > > > > > >
> > > > > > > @kito, @Jim: Please let me know if I should create a issue in
> > > > > > > riscv-gnu-toolchain repo or somewhere else.
> > > > > >
> > > > > > I can't do anything useful without a testcase that I can use to
> > > > > > reproduce the problem. The interactions here are complex, so pointing
> > > > > > at lines of code or kernel config options doesn't give me any useful
> > > > > > info.
> > > > > >
> > > > > > Relaxation can convert calls to a jal. I don't know of any open bugs
> > > > > > in this area that can generate relocation errors. if it is a
> > > > > > relaxation error then turning off relaxation should work around the
> > > > > > problem as you suggested.
> > > > > >
> > > > > > A kernel build problem is serious. I think this is worth a bug
> > > > > > report. FSF binutils or riscv-gnu-toolchain is fine.
> > > > > >
> > > > >
> > > > > I have created an issue with detailed descriptions and reproduction steps.
> > > > > Please let me know if you need anything else.
> > > > >
> > > >
> > > > It may be a toolchain issue. Here is the ongoing discussion in case
> > > > anybody else is interested.
> > > >
> > > > https://github.com/riscv/riscv-gnu-toolchain/issues/738
> > > >
> > > > > > Jim
> > > > >
> > > > >
> > > > >
> > > > > --
> > > > > Regards,
> > > > > Atish
> > > >
> > > >
> > > >
> > > > --
> > > > Regards,
> > > > Atish
> > >
> > > Thanks to Jim, we know the cause now. Jim has provided an excellent
> > > analysis of the issue in the github issue report.
> > > https://github.com/riscv/riscv-gnu-toolchain/issues/738
> > >
> > > To summarize, the linker relaxation code is not aware of the
> > > alignments between sections.
> > > That's why it relaxes the calls from .text to .init.text and converts
> > > a auipc+jalr pair to jal even if the address can't be fit +/- 1MB.
> > >
> > > There are few ways we can solve this problem.
> > >
> > > 1. As per Jim's suggestion, linker relaxation code is aware of the
> > > section alignments. We can mark .init.text as a 2MB aligned section.
> > > For calls within a section, section's alignment will be used in the
> > > calculation. For calls across sections, e.g. from .init.text to .text,
> > > the maximum
> > > section alignment of every section will be used. Thus, all
> > > relaxation within .init.text and between any sections will be
> > > impacted.
> > > Thus, it avoids the error but results in the following increase in
> > > size of various sections.
> > > section change in size (in bytes)
> > > .head.text +4
> > > .text +40
> > > .init.text. +6530
> > > .exit.text +84
> > >
> > > The only significant increase is .init.text but it is freed after
> > > boot. Thus, I don't see any significant performance degradation due to
> > > that.
> > >
> > > Here is the diff
> > > --- a/arch/riscv/kernel/vmlinux.lds.S
> > > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > > @@ -51,7 +51,13 @@ SECTIONS
> > > . = ALIGN(SECTION_ALIGN);
> > > __init_begin = .;
> > > __init_text_begin = .;
> > > - INIT_TEXT_SECTION(PAGE_SIZE)
> > > + . = ALIGN(PAGE_SIZE); \
> > > + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
> > > ALIGN(SECTION_ALIGN) { \
> > > + _sinittext = .; \
> > > + INIT_TEXT \
> > > + _einittext = .; \
> > > + }
> > > +
> > > . = ALIGN(8);
> > > __soc_early_init_table : {
> > > __soc_early_init_table_start = .;
> > >
> > > 2. We will continue to keep head.txt & .init.text together before
> > > .text. However, we will map the pages that contain head & init.text at
> > > page
> > > granularity so that .head.text and init.text can have different
> > > permissions. I have not measured the performance impact of this but it
> > > won't
> > > too bad given that the combined size of sections .head.txt &
> > > .init.text is 200K. So we are talking about page level permission only
> > > for
> > > ~50 pages during boot.
> > >
> > > 3. Keep head.text in a separate 2MB aligned section. .init.text will
> > > follow .head.text in its own section as well. This increases the
> > > kernel
> > > size by 2MB for MMU kernels. For nommu case, it will only increase
> > > by 64 bytes due to smaller section alignment for nommu kernels.
> > >
> > > Both solutions 1 & 2 come at minimal performance on boot time while
> > > solution 3 comes at increased kernel size.
> > >
> > > Any preference ?
> >
> > I prefer solution#3 because:
> > 1. This will help us avoid special handling of static objects
> > 2. This will make RISC-V linker script more aligned with other
> > major architectures
> >
> > I don't think solution#3 will increase the size of the kernel by 2MB. We
> > can make head.text part of text section. There will be only one section
> > alignment between text section and init section but the existing section
> > alignment between init section and text section will be removed. In other
> > words, number of section alignments will remain same.
>
> I think we will need to incorporate Jim's suggestion irrespective of the
> solution we choose because without Jim's changes we can hit the
> linker relaxation issue in solution#2 as well.
>
Reconsidering all the possible approaches, I think you are right.
With approach 2 & 3, I just hit the issue now. However, there is an
alignment between
sections which may throw the same linker error in future when text or
init.text gets bigger.
I will follow approach 1 suggested by Jim to revise the series.
> Regards,
> Anup
--
Regards,
Atish