On book3s32 (hash), exec protection is set per 256Mb segments with NX bit.
Instead of clearing NX bit on vmalloc space when CONFIG_MODULES is selected,
allocate modules in a dedicated segment (0xb0000000-0xbfffffff by default).
This allows to keep exec protection on vmalloc space while allowing exec
on modules.
v2:
- Removed the two patches that fix ptdump. Will submitted independently
- Only changing the user/kernel boundary for PPC32 now.
- Reordered the patches inside the series.
Christophe Leroy (6):
powerpc/lib: Prepare code-patching for modules allocated outside
vmalloc space
powerpc: Use MODULES_VADDR if defined
powerpc/32s: Only leave NX unset on segments used for modules
powerpc/32: Set user/kernel boundary at TASK_SIZE instead of
PAGE_OFFSET
powerpc/32s: Kernel space starts at TASK_SIZE
powerpc/32s: Use dedicated segment for modules with STRICT_KERNEL_RWX
arch/powerpc/Kconfig | 1 +
arch/powerpc/include/asm/book3s/32/pgtable.h | 15 +++++----------
arch/powerpc/include/asm/page.h | 4 +++-
arch/powerpc/kernel/head_32.S | 12 ++++++------
arch/powerpc/kernel/module.c | 11 +++++++++++
arch/powerpc/lib/code-patching.c | 2 +-
arch/powerpc/mm/book3s32/hash_low.S | 2 +-
arch/powerpc/mm/book3s32/mmu.c | 17 ++++++++++++++---
arch/powerpc/mm/kasan/kasan_init_32.c | 6 ++++++
arch/powerpc/mm/ptdump/ptdump.c | 16 ++++++++++++++--
10 files changed, 62 insertions(+), 24 deletions(-)
--
2.25.0
Instead of leaving NX unset on all segments above the start
of vmalloc space, only leave NX unset on segments used for
modules.
Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/mm/book3s32/mmu.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 03b6ba54460e..c0162911f6cb 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -187,6 +187,17 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
return __mmu_mapin_ram(border, top);
}
+static bool is_module_segment(unsigned long addr)
+{
+ if (!IS_ENABLED(CONFIG_MODULES))
+ return false;
+ if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M))
+ return false;
+ if (addr >= ALIGN(VMALLOC_END, SZ_256M))
+ return false;
+ return true;
+}
+
void mmu_mark_initmem_nx(void)
{
int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
@@ -223,9 +234,9 @@ void mmu_mark_initmem_nx(void)
for (i = TASK_SIZE >> 28; i < 16; i++) {
/* Do not set NX on VM space for modules */
- if (IS_ENABLED(CONFIG_MODULES) &&
- (VMALLOC_START & 0xf0000000) == i << 28)
- break;
+ if (is_module_segment(i << 28))
+ continue;
+
mtsrin(mfsrin(i << 28) | 0x10000000, i << 28);
}
}
--
2.25.0
User space stops at TASK_SIZE. At the moment, kernel space starts
at PAGE_OFFSET.
In order to use space between TASK_SIZE and PAGE_OFFSET for modules,
make TASK_SIZE the limit between user and kernel space.
Note that fault.c already considers TASK_SIZE as the boundary between
user and kernel space.
Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/include/asm/page.h | 4 +++-
arch/powerpc/mm/ptdump/ptdump.c | 8 ++++++--
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index a63fe6f3a0ff..254687258f42 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -255,8 +255,10 @@ static inline bool pfn_valid(unsigned long pfn)
*/
#ifdef CONFIG_PPC_BOOK3E_64
#define is_kernel_addr(x) ((x) >= 0x8000000000000000ul)
-#else
+#elif defined(CONFIG_PPC_BOOK3S_64)
#define is_kernel_addr(x) ((x) >= PAGE_OFFSET)
+#else
+#define is_kernel_addr(x) ((x) >= TASK_SIZE)
#endif
#ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index de6e05ef871c..9d942136c7be 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -348,7 +348,11 @@ static void populate_markers(void)
{
int i = 0;
+#ifdef CONFIG_PPC64
address_markers[i++].start_address = PAGE_OFFSET;
+#else
+ address_markers[i++].start_address = TASK_SIZE;
+#endif
address_markers[i++].start_address = VMALLOC_START;
address_markers[i++].start_address = VMALLOC_END;
#ifdef CONFIG_PPC64
@@ -385,7 +389,7 @@ static int ptdump_show(struct seq_file *m, void *v)
struct pg_state st = {
.seq = m,
.marker = address_markers,
- .start_address = PAGE_OFFSET,
+ .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
};
#ifdef CONFIG_PPC64
@@ -429,7 +433,7 @@ void ptdump_check_wx(void)
.seq = NULL,
.marker = address_markers,
.check_wx = true,
- .start_address = PAGE_OFFSET,
+ .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
};
#ifdef CONFIG_PPC64
--
2.25.0
On Mon, 29 Jun 2020 11:15:19 +0000 (UTC), Christophe Leroy wrote:
> On book3s32 (hash), exec protection is set per 256Mb segments with NX bit.
> Instead of clearing NX bit on vmalloc space when CONFIG_MODULES is selected,
> allocate modules in a dedicated segment (0xb0000000-0xbfffffff by default).
> This allows to keep exec protection on vmalloc space while allowing exec
> on modules.
>
> v2:
> - Removed the two patches that fix ptdump. Will submitted independently
> - Only changing the user/kernel boundary for PPC32 now.
> - Reordered the patches inside the series.
>
> [...]
Applied to powerpc/next.
[1/6] powerpc/lib: Prepare code-patching for modules allocated outside vmalloc space
https://git.kernel.org/powerpc/c/ccc8fcf72a6953fbfd6998999d622295f522b952
[2/6] powerpc: Use MODULES_VADDR if defined
https://git.kernel.org/powerpc/c/7fbc22ce29931630da200cfc90fe5a454f54a794
[3/6] powerpc/32s: Only leave NX unset on segments used for modules
https://git.kernel.org/powerpc/c/c496433197154144c310a17939736bc5c155914d
[4/6] powerpc/32: Set user/kernel boundary at TASK_SIZE instead of PAGE_OFFSET
https://git.kernel.org/powerpc/c/b6be1bb7f7216b9e9f33f57abe6e3290c0e66bd4
[5/6] powerpc/32s: Kernel space starts at TASK_SIZE
https://git.kernel.org/powerpc/c/f1a1f7a15eb0e13b84791ff2738b84e414501718
[6/6] powerpc/32s: Use dedicated segment for modules with STRICT_KERNEL_RWX
https://git.kernel.org/powerpc/c/6ca055322da8fe25ff9ac50db6f3b7b59b6f961c
cheers
On Jun 29 2020, Christophe Leroy wrote:
> Instead of leaving NX unset on all segments above the start
> of vmalloc space, only leave NX unset on segments used for
> modules.
I'm getting this crash:
kernel tried to execute exec-protected page (f294b000) - exploit attempt (uid: 0)
BUG: Unable to handle kernel instruction fetch
Faulting instruction address: 0xf294b000
Oops: Kernel access of bad area, sig: 11 [#1]
BE PAGE_SIZE=4K MMU=Hash PowerMac
Modules linked in: pata_macio(+)
CPU: 0 PID: 87 Comm: udevd Not tainted 5.8.0-rc2-test #49
NIP: f294b000 LR: 0005c60 CTR: f294b000
REGS: f18d9cc0 TRAP: 0400 Not tainted (5.8.0-rc2-test)
MSR: 10009032 <E,ME,IR,DR,RI> CR: 84222422 XER: 20000000
GPR00: c0005c14 f18d9d78 ef30ca20 00000000 ef0000e0 c00993d0 ef6da038 0000005e
GPR08: c09050b8 c08b0000 00000000 f18d9d78 44222422 10072070 00000000 0fefaca4
GPR16: 1006a00c f294d50b 00000120 00000124 c0096ea8 0000000e ef2776c0 ef2776e4
GPR24: f18fd6e8 00000001 c086fe64 c086fe04 00000000 c08b0000 f294b000 ffffffff
NIP [f294b000] pata_macio_init+0x0/0xc0 [pata_macio]
LR [c0005c60] do_one_initcall+0x6c/0x160
Call Trace:
[f18d9d78] [c0005c14] do_one_initcall+0x20/0x160 (unreliable)
[f18d9dd8] [c009a22c] do_init_module+0x60/0x1c0
[f18d9df8] [c00993d8] load_module+0x16a8/0x1c14
[f18d9ea8] [c0099aa4] sys_finit_module+0x8c/0x94
[f18d9f38] [c0012174] ret_from_syscall+0x0/0x34
--- interrupt: c01 at 0xfdb4318
LR = 0xfeee9c0
Instruction dump:
XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX <3d20c08b> 3d40c086 9421ffe0 8129106c
---[ end trace 85a98cc836109871 ]---
Andreas.
--
Andreas Schwab, [email protected]
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1
"And now for something completely different."
Le 21/08/2020 à 00:00, Andreas Schwab a écrit :
> On Jun 29 2020, Christophe Leroy wrote:
>
>> Instead of leaving NX unset on all segments above the start
>> of vmalloc space, only leave NX unset on segments used for
>> modules.
>
> I'm getting this crash:
>
> kernel tried to execute exec-protected page (f294b000) - exploit attempt (uid: 0)
> BUG: Unable to handle kernel instruction fetch
> Faulting instruction address: 0xf294b000
> Oops: Kernel access of bad area, sig: 11 [#1]
> BE PAGE_SIZE=4K MMU=Hash PowerMac
> Modules linked in: pata_macio(+)
> CPU: 0 PID: 87 Comm: udevd Not tainted 5.8.0-rc2-test #49
> NIP: f294b000 LR: 0005c60 CTR: f294b000
> REGS: f18d9cc0 TRAP: 0400 Not tainted (5.8.0-rc2-test)
> MSR: 10009032 <E,ME,IR,DR,RI> CR: 84222422 XER: 20000000
> GPR00: c0005c14 f18d9d78 ef30ca20 00000000 ef0000e0 c00993d0 ef6da038 0000005e
> GPR08: c09050b8 c08b0000 00000000 f18d9d78 44222422 10072070 00000000 0fefaca4
> GPR16: 1006a00c f294d50b 00000120 00000124 c0096ea8 0000000e ef2776c0 ef2776e4
> GPR24: f18fd6e8 00000001 c086fe64 c086fe04 00000000 c08b0000 f294b000 ffffffff
> NIP [f294b000] pata_macio_init+0x0/0xc0 [pata_macio]
> LR [c0005c60] do_one_initcall+0x6c/0x160
> Call Trace:
> [f18d9d78] [c0005c14] do_one_initcall+0x20/0x160 (unreliable)
> [f18d9dd8] [c009a22c] do_init_module+0x60/0x1c0
> [f18d9df8] [c00993d8] load_module+0x16a8/0x1c14
> [f18d9ea8] [c0099aa4] sys_finit_module+0x8c/0x94
> [f18d9f38] [c0012174] ret_from_syscall+0x0/0x34
> --- interrupt: c01 at 0xfdb4318
> LR = 0xfeee9c0
> Instruction dump:
> XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
> XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX <3d20c08b> 3d40c086 9421ffe0 8129106c
> ---[ end trace 85a98cc836109871 ]---
>
Please try the patch at
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/07884ed033c31e074747b7eb8eaa329d15db07ec.1596641219.git.christophe.leroy@csgroup.eu/
And if you are using KAsan, also take
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/6eddca2d5611fd57312a88eae31278c87a8fc99d.1596641224.git.christophe.leroy@csgroup.eu/
Allthough I have some doubt that it will fix it, because the faulting
instruction address is at 0xf294b000 which is within the vmalloc area.
In the likely case the patch doesn't fix the issue, can you provide your
.config and a dump of /sys/kernel/debug/powerpc/segment_registers (You
have to have CONFIG_PPC_PTDUMP enabled for that) and also the below part
from boot log.
[ 0.000000] Memory: 509556K/524288K available (7088K kernel code,
592K rwdata, 1304K rodata, 356K init, 803K bss, 14732K reserved, 0K
cma-reserved)
[ 0.000000] Kernel virtual memory layout:
[ 0.000000] * 0xff7ff000..0xfffff000 : fixmap
[ 0.000000] * 0xff7fd000..0xff7ff000 : early ioremap
[ 0.000000] * 0xe1000000..0xff7fd000 : vmalloc & ioremap
Thanks
Christophe
On 08/21/2020 05:11 AM, Christophe Leroy wrote:
>
>
> Le 21/08/2020 à 00:00, Andreas Schwab a écrit :
>> On Jun 29 2020, Christophe Leroy wrote:
>>
>>> Instead of leaving NX unset on all segments above the start
>>> of vmalloc space, only leave NX unset on segments used for
>>> modules.
>>
>> I'm getting this crash:
>>
>> kernel tried to execute exec-protected page (f294b000) - exploit
>> attempt (uid: 0)
>> BUG: Unable to handle kernel instruction fetch
>> Faulting instruction address: 0xf294b000
>> Oops: Kernel access of bad area, sig: 11 [#1]
>> BE PAGE_SIZE=4K MMU=Hash PowerMac
>> Modules linked in: pata_macio(+)
>> CPU: 0 PID: 87 Comm: udevd Not tainted 5.8.0-rc2-test #49
>> NIP: f294b000 LR: 0005c60 CTR: f294b000
>> REGS: f18d9cc0 TRAP: 0400 Not tainted (5.8.0-rc2-test)
>> MSR: 10009032 <E,ME,IR,DR,RI> CR: 84222422 XER: 20000000
>> GPR00: c0005c14 f18d9d78 ef30ca20 00000000 ef0000e0 c00993d0 ef6da038
>> 0000005e
>> GPR08: c09050b8 c08b0000 00000000 f18d9d78 44222422 10072070 00000000
>> 0fefaca4
>> GPR16: 1006a00c f294d50b 00000120 00000124 c0096ea8 0000000e ef2776c0
>> ef2776e4
>> GPR24: f18fd6e8 00000001 c086fe64 c086fe04 00000000 c08b0000 f294b000
>> ffffffff
>> NIP [f294b000] pata_macio_init+0x0/0xc0 [pata_macio]
>> LR [c0005c60] do_one_initcall+0x6c/0x160
>> Call Trace:
>> [f18d9d78] [c0005c14] do_one_initcall+0x20/0x160 (unreliable)
>> [f18d9dd8] [c009a22c] do_init_module+0x60/0x1c0
>> [f18d9df8] [c00993d8] load_module+0x16a8/0x1c14
>> [f18d9ea8] [c0099aa4] sys_finit_module+0x8c/0x94
>> [f18d9f38] [c0012174] ret_from_syscall+0x0/0x34
>> --- interrupt: c01 at 0xfdb4318
>> LR = 0xfeee9c0
>> Instruction dump:
>> XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
>> XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX <3d20c08b> 3d40c086 9421ffe0 8129106c
>> ---[ end trace 85a98cc836109871 ]---
>>
>
> Please try the patch at
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/07884ed033c31e074747b7eb8eaa329d15db07ec.1596641219.git.christophe.leroy@csgroup.eu/
>
>
> And if you are using KAsan, also take
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/6eddca2d5611fd57312a88eae31278c87a8fc99d.1596641224.git.christophe.leroy@csgroup.eu/
>
>
> Allthough I have some doubt that it will fix it, because the faulting
> instruction address is at 0xf294b000 which is within the vmalloc area.
> In the likely case the patch doesn't fix the issue, can you provide your
> .config and a dump of /sys/kernel/debug/powerpc/segment_registers (You
> have to have CONFIG_PPC_PTDUMP enabled for that) and also the below part
> from boot log.
>
> [ 0.000000] Memory: 509556K/524288K available (7088K kernel code,
> 592K rwdata, 1304K rodata, 356K init, 803K bss, 14732K reserved, 0K
> cma-reserved)
> [ 0.000000] Kernel virtual memory layout:
> [ 0.000000] * 0xff7ff000..0xfffff000 : fixmap
> [ 0.000000] * 0xff7fd000..0xff7ff000 : early ioremap
> [ 0.000000] * 0xe1000000..0xff7fd000 : vmalloc & ioremap
>
I found the issue, when VMALLOC_END is above 0xf0000000,
ALIGN(VMALLOC_END, SZ_256M) is 0 so the test is always false.
The below change should fix it.
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 82ae9e06a773..d426eaf76bb0 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -194,12 +194,12 @@ static bool is_module_segment(unsigned long addr)
#ifdef MODULES_VADDR
if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
return false;
- if (addr >= ALIGN(MODULES_END, SZ_256M))
+ if (addr > ALIGN(MODULES_END, SZ_256M) - 1)
return false;
#else
if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M))
return false;
- if (addr >= ALIGN(VMALLOC_END, SZ_256M))
+ if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1)
return false;
#endif
return true;
Christophe