2005-03-08 23:14:46

by Jake Moilanen

[permalink] [raw]
Subject: [PATCH 0/2] No-exec support for ppc64

These patches add no execute support to PPC64. They prohibit executing
code on the stack, or most any non-text segment for both user space, and
kernel.

No execute is supported on Power4 processors and up. These processors
support pages that have a no-execute permission bit.

The patches include a base fixup from Anton Blanchard. This includes a
fix for the wrong bit being used for no-exec and for read/write on the
hardware PTEs.

For distros that compile w/ pt_gnu_stacks, they depend on Ben
Herrenschmidt's vDSO patches for signal trampoline. Without it, the
application will hang on the first signal due to the return code being
put on the signal context stack to return to the kernel on the
completion of the signal handler. The changes should be in the latest
BK tree.

The patch is broken into two parts:

1/2: PPC64 no-exec support for user space: This will prohibit user
space apps from executing in segments not marked as executable. The
base support is in here as well.

2/2: PPC64 no-exec support for kernel space: This prohibits the kernel
from executing non-text code.

Thanks,
Jake


2005-03-08 23:21:13

by Jake Moilanen

[permalink] [raw]
Subject: [PATCH 1/2] No-exec support for ppc64

No-exec base and user space support for PPC64.

This will prohibit user space apps that a compile w/ PT_GNU_STACK from
executing in segments that are non-executable. Non-PT_GNU_STACK
compiled apps will work as well, but will not be able to take advantage
of the no-exec feature.

Signed-off-by: Jake Moilanen <[email protected]>

---

linux-2.6-bk-moilanen/arch/ppc64/kernel/head.S | 5 +
linux-2.6-bk-moilanen/arch/ppc64/kernel/iSeries_htab.c | 4 +
linux-2.6-bk-moilanen/arch/ppc64/kernel/pSeries_lpar.c | 2
linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c | 14 +++--
linux-2.6-bk-moilanen/arch/ppc64/mm/hash_low.S | 12 ++--
linux-2.6-bk-moilanen/arch/ppc64/mm/hugetlbpage.c | 13 ++++
linux-2.6-bk-moilanen/fs/binfmt_elf.c | 2
linux-2.6-bk-moilanen/include/asm-ppc64/elf.h | 7 ++
linux-2.6-bk-moilanen/include/asm-ppc64/page.h | 19 ++++++-
linux-2.6-bk-moilanen/include/asm-ppc64/pgtable.h | 45 +++++++++--------
10 files changed, 87 insertions(+), 36 deletions(-)

diff -puN arch/ppc64/kernel/head.S~nx-user-ppc64 arch/ppc64/kernel/head.S
--- linux-2.6-bk/arch/ppc64/kernel/head.S~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/kernel/head.S 2005-03-08 16:08:54 -06:00
@@ -36,6 +36,7 @@
#include <asm/offsets.h>
#include <asm/bug.h>
#include <asm/cputable.h>
+#include <asm/pgtable.h>
#include <asm/setup.h>

#ifdef CONFIG_PPC_ISERIES
@@ -950,11 +951,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
* accessing a userspace segment (even from the kernel). We assume
* kernel addresses always have the high bit set.
*/
- rlwinm r4,r4,32-23,29,29 /* DSISR_STORE -> _PAGE_RW */
+ rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
orc r0,r12,r0 /* MSR_PR | ~high_bit */
rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
- ori r4,r4,1 /* add _PAGE_PRESENT */
+ rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */

/*
* On iSeries, we soft-disable interrupts here, then
diff -puN arch/ppc64/kernel/iSeries_htab.c~nx-user-ppc64 arch/ppc64/kernel/iSeries_htab.c
--- linux-2.6-bk/arch/ppc64/kernel/iSeries_htab.c~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/kernel/iSeries_htab.c 2005-03-08 16:08:54 -06:00
@@ -144,6 +144,10 @@ static long iSeries_hpte_updatepp(unsign

HvCallHpt_get(&hpte, slot);
if ((hpte.dw0.dw0.avpn == avpn) && (hpte.dw0.dw0.v)) {
+ /*
+ * Hypervisor expects bit's as NPPP, which is
+ * different from how they are mapped in our PP.
+ */
HvCallHpt_setPp(slot, (newpp & 0x3) | ((newpp & 0x4) << 1));
iSeries_hunlock(slot);
return 0;
diff -puN arch/ppc64/kernel/pSeries_lpar.c~nx-user-ppc64 arch/ppc64/kernel/pSeries_lpar.c
--- linux-2.6-bk/arch/ppc64/kernel/pSeries_lpar.c~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/kernel/pSeries_lpar.c 2005-03-08 16:08:54 -06:00
@@ -470,7 +470,7 @@ static void pSeries_lpar_hpte_updatebolt
slot = pSeries_lpar_hpte_find(vpn);
BUG_ON(slot == -1);

- flags = newpp & 3;
+ flags = newpp & 7;
lpar_rc = plpar_pte_protect(flags, slot, 0);

BUG_ON(lpar_rc != H_Success);
diff -puN arch/ppc64/mm/fault.c~nx-user-ppc64 arch/ppc64/mm/fault.c
--- linux-2.6-bk/arch/ppc64/mm/fault.c~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c 2005-03-08 16:08:54 -06:00
@@ -93,6 +93,7 @@ int do_page_fault(struct pt_regs *regs,
unsigned long code = SEGV_MAPERR;
unsigned long is_write = error_code & 0x02000000;
unsigned long trap = TRAP(regs);
+ unsigned long is_exec = trap == 0x400;

BUG_ON((trap == 0x380) || (trap == 0x480));

@@ -199,16 +200,19 @@ int do_page_fault(struct pt_regs *regs,
good_area:
code = SEGV_ACCERR;

+ if (is_exec) {
+ /* protection fault */
+ if (error_code & 0x08000000)
+ goto bad_area;
+ if (!(vma->vm_flags & VM_EXEC))
+ goto bad_area;
/* a write */
- if (is_write) {
+ } else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
/* a read */
} else {
- /* protection fault */
- if (error_code & 0x08000000)
- goto bad_area;
- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ if (!(vma->vm_flags & VM_READ))
goto bad_area;
}

diff -puN arch/ppc64/mm/hash_low.S~nx-user-ppc64 arch/ppc64/mm/hash_low.S
--- linux-2.6-bk/arch/ppc64/mm/hash_low.S~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/mm/hash_low.S 2005-03-08 16:08:54 -06:00
@@ -89,7 +89,7 @@ _GLOBAL(__hash_page)
/* Prepare new PTE value (turn access RW into DIRTY, then
* add BUSY,HASHPTE and ACCESSED)
*/
- rlwinm r30,r4,5,24,24 /* _PAGE_RW -> _PAGE_DIRTY */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
or r30,r30,r31
ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
/* Write the linux PTE atomically (setting busy) */
@@ -112,11 +112,11 @@ _GLOBAL(__hash_page)
rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
xor r28,r5,r0
-
- /* Convert linux PTE bits into HW equivalents
- */
- andi. r3,r30,0x1fa /* Get basic set of flags */
- rlwinm r0,r30,32-2+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
andc r0,r30,r0 /* r0 = pte & ~r0 */
diff -puN arch/ppc64/mm/hugetlbpage.c~nx-user-ppc64 arch/ppc64/mm/hugetlbpage.c
--- linux-2.6-bk/arch/ppc64/mm/hugetlbpage.c~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/mm/hugetlbpage.c 2005-03-08 16:08:54 -06:00
@@ -786,6 +786,7 @@ int hash_huge_page(struct mm_struct *mm,
pte_t old_pte, new_pte;
unsigned long hpteflags, prpn;
long slot;
+ int is_exec;
int err = 1;

spin_lock(&mm->page_table_lock);
@@ -796,6 +797,10 @@ int hash_huge_page(struct mm_struct *mm,
va = (vsid << 28) | (ea & 0x0fffffff);
vpn = va >> HPAGE_SHIFT;

+ is_exec = access & _PAGE_EXEC;
+ if (unlikely(is_exec && !(pte_val(*ptep) & _PAGE_EXEC)))
+ goto out;
+
/*
* If no pte found or not present, send the problem up to
* do_page_fault
@@ -828,7 +833,12 @@ int hash_huge_page(struct mm_struct *mm,
old_pte = *ptep;
new_pte = old_pte;

- hpteflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+ hpteflags = (pte_val(new_pte) & _PAGE_RW) |
+ (!(pte_val(new_pte) & _PAGE_RW)) |
+ _PAGE_USER;
+
+ /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
+ hpteflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);

/* Check if pte already has an hpte (case 2) */
if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
@@ -898,6 +908,7 @@ repeat:
err = 0;

out:
+
spin_unlock(&mm->page_table_lock);

return err;
diff -puN fs/binfmt_elf.c~nx-user-ppc64 fs/binfmt_elf.c
--- linux-2.6-bk/fs/binfmt_elf.c~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/fs/binfmt_elf.c 2005-03-08 16:08:54 -06:00
@@ -99,6 +99,8 @@ static int set_brk(unsigned long start,
up_write(&current->mm->mmap_sem);
if (BAD_ADDR(addr))
return addr;
+
+ sys_mprotect(start, end-start, PROT_READ|PROT_WRITE|PROT_EXEC);
}
current->mm->start_brk = current->mm->brk = end;
return 0;
diff -puN include/asm-ppc64/elf.h~nx-user-ppc64 include/asm-ppc64/elf.h
--- linux-2.6-bk/include/asm-ppc64/elf.h~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/include/asm-ppc64/elf.h 2005-03-08 16:08:54 -06:00
@@ -226,6 +226,13 @@ do { \
else if (current->personality != PER_LINUX32) \
set_personality(PER_LINUX); \
} while (0)
+
+/*
+ * An executable for which elf_read_implies_exec() returns TRUE will
+ * have the READ_IMPLIES_EXEC personality flag set automatically.
+ */
+#define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack))
+
#endif

/*
diff -puN include/asm-ppc64/page.h~nx-user-ppc64 include/asm-ppc64/page.h
--- linux-2.6-bk/include/asm-ppc64/page.h~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/include/asm-ppc64/page.h 2005-03-08 16:08:54 -06:00
@@ -235,8 +235,25 @@ extern u64 ppc64_pft_size; /* Log 2 of

#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)

-#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
+#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS \
+ (test_thread_flag(TIF_32BIT) ? \
+ VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
+
+#define VM_STACK_DEFAULT_FLAGS \
+ (test_thread_flag(TIF_32BIT) ? \
+ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)

#endif /* __KERNEL__ */
#endif /* _PPC64_PAGE_H */
diff -puN include/asm-ppc64/pgtable.h~nx-user-ppc64 include/asm-ppc64/pgtable.h
--- linux-2.6-bk/include/asm-ppc64/pgtable.h~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/include/asm-ppc64/pgtable.h 2005-03-08 16:08:54 -06:00
@@ -82,14 +82,14 @@
#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
#define _PAGE_USER 0x0002 /* matches one of the PP bits */
#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
-#define _PAGE_RW 0x0004 /* software: user write access allowed */
+#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */
#define _PAGE_GUARDED 0x0008
#define _PAGE_COHERENT 0x0010 /* M: enforce memory coherence (SMP systems) */
#define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */
#define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */
#define _PAGE_DIRTY 0x0080 /* C: page changed */
#define _PAGE_ACCESSED 0x0100 /* R: page referenced */
-#define _PAGE_EXEC 0x0200 /* software: i-cache coherence required */
+#define _PAGE_RW 0x0200 /* software: user write access allowed */
#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
@@ -100,7 +100,7 @@
/* PAGE_MASK gives the right answer below, but only by accident */
/* It should be preserving the high 48 bits and then specifically */
/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HPTEFLAGS)
+#define _PAGE_CHG_MASK (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | _PAGE_WRITETHRU | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_HPTEFLAGS | PAGE_MASK)

#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_COHERENT)

@@ -116,31 +116,38 @@
#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_WRENABLE)
-#define PAGE_KERNEL_CI __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
- _PAGE_WRENABLE | _PAGE_NO_CACHE | _PAGE_GUARDED)
+
+#define HW_NO_EXEC _PAGE_EXEC /* This is used when the bit is
+ * inverted, even though it's the
+ * same value, hopefully it will be
+ * clearer in the code what is
+ * going on. */

/*
- * The PowerPC can only do execute protection on a segment (256MB) basis,
- * not on a page basis. So we consider execute permission the same as read.
+ * POWER4 and newer have per page execute protection, older chips can only
+ * do this on a segment (256MB) basis.
+ *
* Also, write permissions imply read permissions.
* This is the closest we can get..
+ *
+ * Note due to the way vm flags are laid out, the bits are XWR
*/
#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY_X
+#define __P001 PAGE_READONLY
#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY_X
-#define __P100 PAGE_READONLY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_X
#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY
+#define __P110 PAGE_COPY_X
#define __P111 PAGE_COPY_X

#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY_X
+#define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED_X
-#define __S100 PAGE_READONLY
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_X
#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED
+#define __S110 PAGE_SHARED_X
#define __S111 PAGE_SHARED_X

#ifndef __ASSEMBLY__
@@ -197,7 +204,8 @@ void hugetlb_mm_free_pgd(struct mm_struc
})

#define pte_modify(_pte, newprot) \
- (__pte((pte_val(_pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)))
+ (__pte((pte_val(_pte) & _PAGE_CHG_MASK) | \
+ (pgprot_val(newprot) & ~_PAGE_CHG_MASK)))

#define pte_none(pte) ((pte_val(pte) & ~_PAGE_HPTEFLAGS) == 0)
#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT)
@@ -266,9 +274,6 @@ static inline int pte_young(pte_t pte) {
static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;}
static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE;}

-static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; }
-static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; }
-
static inline pte_t pte_rdprotect(pte_t pte) {
pte_val(pte) &= ~_PAGE_USER; return pte; }
static inline pte_t pte_exprotect(pte_t pte) {
@@ -438,7 +443,7 @@ static inline void set_pte_at(struct mm_
static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry, int dirty)
{
unsigned long bits = pte_val(entry) &
- (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW);
+ (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
unsigned long old, tmp;

__asm__ __volatile__(

_

2005-03-08 23:27:21

by Jake Moilanen

[permalink] [raw]
Subject: [PATCH 2/2] No-exec support for ppc64

No-exec support for the kernel on PPC64.

This will mark all non-text kernel pages as no-execute.

Signed-off-by: Jake Moilanen <[email protected]>

---

linux-2.6-bk-moilanen/arch/ppc64/kernel/iSeries_setup.c | 7 +++
linux-2.6-bk-moilanen/arch/ppc64/kernel/module.c | 3 +
linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c | 25 ++++++++++++
linux-2.6-bk-moilanen/arch/ppc64/mm/hash_utils.c | 31 ++++++++++++----
linux-2.6-bk-moilanen/include/asm-ppc64/pgtable.h | 1
5 files changed, 59 insertions(+), 8 deletions(-)

diff -puN arch/ppc64/kernel/iSeries_setup.c~nx-kernel-ppc64 arch/ppc64/kernel/iSeries_setup.c
--- linux-2.6-bk/arch/ppc64/kernel/iSeries_setup.c~nx-kernel-ppc64 2005-03-08 16:08:57 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/kernel/iSeries_setup.c 2005-03-08 16:08:57 -06:00
@@ -624,6 +624,7 @@ static void __init iSeries_bolt_kernel(u
{
unsigned long pa;
unsigned long mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
+ unsigned long tmp_mode;
HPTE hpte;

for (pa = saddr; pa < eaddr ;pa += PAGE_SIZE) {
@@ -632,6 +633,12 @@ static void __init iSeries_bolt_kernel(u
unsigned long va = (vsid << 28) | (pa & 0xfffffff);
unsigned long vpn = va >> PAGE_SHIFT;
unsigned long slot = HvCallHpt_findValid(&hpte, vpn);
+
+ tmp_mode = mode_rw;
+
+ /* Make non-kernel text non-executable */
+ if (!is_kernel_text(ea))
+ tmp_mode = mode_rw | HW_NO_EXEC;

if (hpte.dw0.dw0.v) {
/* HPTE exists, so just bolt it */
diff -puN arch/ppc64/kernel/module.c~nx-kernel-ppc64 arch/ppc64/kernel/module.c
--- linux-2.6-bk/arch/ppc64/kernel/module.c~nx-kernel-ppc64 2005-03-08 16:08:57 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/kernel/module.c 2005-03-08 16:08:57 -06:00
@@ -102,7 +102,8 @@ void *module_alloc(unsigned long size)
{
if (size == 0)
return NULL;
- return vmalloc(size);
+
+ return vmalloc_exec(size);
}

/* Free memory returned from module_alloc */
diff -puN arch/ppc64/mm/fault.c~nx-kernel-ppc64 arch/ppc64/mm/fault.c
--- linux-2.6-bk/arch/ppc64/mm/fault.c~nx-kernel-ppc64 2005-03-08 16:08:57 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c 2005-03-08 16:08:57 -06:00
@@ -76,6 +76,21 @@ static int store_updates_sp(struct pt_re
return 0;
}

+pte_t *lookup_address(unsigned long address)
+{
+ pgd_t *pgd = pgd_offset_k(address);
+ pmd_t *pmd;
+
+ if (pgd_none(*pgd))
+ return NULL;
+
+ pmd = pmd_offset(pgd, address);
+ if (pmd_none(*pmd))
+ return NULL;
+
+ return pte_offset_kernel(pmd, address);
+}
+
/*
* The error_code parameter is
* - DSISR for a non-SLB data access fault,
@@ -94,6 +109,7 @@ int do_page_fault(struct pt_regs *regs,
unsigned long is_write = error_code & 0x02000000;
unsigned long trap = TRAP(regs);
unsigned long is_exec = trap == 0x400;
+ pte_t *ptep;

BUG_ON((trap == 0x380) || (trap == 0x480));

@@ -253,6 +269,15 @@ bad_area_nosemaphore:
info.si_addr = (void __user *) address;
force_sig_info(SIGSEGV, &info, current);
return 0;
+ }
+
+ ptep = lookup_address(address);
+
+ if (ptep && pte_present(*ptep) && !pte_exec(*ptep)) {
+ if (printk_ratelimit())
+ printk(KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", current->uid);
+ show_stack(current, (unsigned long *)__get_SP());
+ do_exit(SIGKILL);
}

return SIGSEGV;
diff -puN arch/ppc64/mm/hash_utils.c~nx-kernel-ppc64 arch/ppc64/mm/hash_utils.c
--- linux-2.6-bk/arch/ppc64/mm/hash_utils.c~nx-kernel-ppc64 2005-03-08 16:08:57 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/mm/hash_utils.c 2005-03-08 16:08:57 -06:00
@@ -51,6 +51,7 @@
#include <asm/cacheflush.h>
#include <asm/cputable.h>
#include <asm/abs_addr.h>
+#include <asm/sections.h>

#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -89,12 +90,23 @@ static inline void loop_forever(void)
;
}

+int is_kernel_text(unsigned long addr)
+{
+ if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
+ return 1;
+
+ return 0;
+}
+
+
+
#ifdef CONFIG_PPC_MULTIPLATFORM
static inline void create_pte_mapping(unsigned long start, unsigned long end,
unsigned long mode, int large)
{
unsigned long addr;
unsigned int step;
+ unsigned long tmp_mode;

if (large)
step = 16*MB;
@@ -112,6 +124,13 @@ static inline void create_pte_mapping(un
else
vpn = va >> PAGE_SHIFT;

+
+ tmp_mode = mode;
+
+ /* Make non-kernel text non-executable */
+ if (!is_kernel_text(addr))
+ tmp_mode = mode | HW_NO_EXEC;
+
hash = hpt_hash(vpn, large);

hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -120,12 +139,12 @@ static inline void create_pte_mapping(un
if (systemcfg->platform & PLATFORM_LPAR)
ret = pSeries_lpar_hpte_insert(hpteg, va,
virt_to_abs(addr) >> PAGE_SHIFT,
- 0, mode, 1, large);
+ 0, tmp_mode, 1, large);
else
#endif /* CONFIG_PPC_PSERIES */
ret = native_hpte_insert(hpteg, va,
virt_to_abs(addr) >> PAGE_SHIFT,
- 0, mode, 1, large);
+ 0, tmp_mode, 1, large);

if (ret == -1) {
ppc64_terminate_msg(0x20, "create_pte_mapping");
@@ -238,8 +257,6 @@ unsigned int hash_page_do_lazy_icache(un
{
struct page *page;

-#define PPC64_HWNOEXEC (1 << 2)
-
if (!pfn_valid(pte_pfn(pte)))
return pp;

@@ -250,8 +267,8 @@ unsigned int hash_page_do_lazy_icache(un
if (trap == 0x400) {
__flush_dcache_icache(page_address(page));
set_bit(PG_arch_1, &page->flags);
- } else
- pp |= PPC64_HWNOEXEC;
+ } else
+ pp |= HW_NO_EXEC;
}
return pp;
}
@@ -271,7 +288,7 @@ int hash_page(unsigned long ea, unsigned
int user_region = 0;
int local = 0;
cpumask_t tmp;
-
+
switch (REGION_ID(ea)) {
case USER_REGION_ID:
user_region = 1;
diff -puN include/asm-ppc64/pgtable.h~nx-kernel-ppc64 include/asm-ppc64/pgtable.h
--- linux-2.6-bk/include/asm-ppc64/pgtable.h~nx-kernel-ppc64 2005-03-08 16:08:57 -06:00
+++ linux-2.6-bk-moilanen/include/asm-ppc64/pgtable.h 2005-03-08 16:08:57 -06:00
@@ -116,6 +116,7 @@
#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_WRENABLE)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_EXEC)

#define HW_NO_EXEC _PAGE_EXEC /* This is used when the bit is
* inverted, even though it's the

_

2005-03-09 03:06:49

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 2/2] No-exec support for ppc64

On Tue, 2005-03-08 at 17:13 -0600, Jake Moilanen wrote:

> diff -puN arch/ppc64/kernel/iSeries_setup.c~nx-kernel-ppc64 arch/ppc64/kernel/iSeries_setup.c
> --- linux-2.6-bk/arch/ppc64/kernel/iSeries_setup.c~nx-kernel-ppc64 2005-03-08 16:08:57 -06:00
> +++ linux-2.6-bk-moilanen/arch/ppc64/kernel/iSeries_setup.c 2005-03-08 16:08:57 -06:00
> @@ -624,6 +624,7 @@ static void __init iSeries_bolt_kernel(u
> {
> unsigned long pa;
> unsigned long mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
> + unsigned long tmp_mode;
> HPTE hpte;
>
> for (pa = saddr; pa < eaddr ;pa += PAGE_SIZE) {
> @@ -632,6 +633,12 @@ static void __init iSeries_bolt_kernel(u
> unsigned long va = (vsid << 28) | (pa & 0xfffffff);
> unsigned long vpn = va >> PAGE_SHIFT;
> unsigned long slot = HvCallHpt_findValid(&hpte, vpn);
> +
> + tmp_mode = mode_rw;
> +
> + /* Make non-kernel text non-executable */
> + if (!is_kernel_text(ea))
> + tmp_mode = mode_rw | HW_NO_EXEC;
>
> if (hpte.dw0.dw0.v) {
> /* HPTE exists, so just bolt it */

tmp_mode doesn't seem to be ever used here ...

> /* Free memory returned from module_alloc */
> diff -puN arch/ppc64/mm/fault.c~nx-kernel-ppc64 arch/ppc64/mm/fault.c
> --- linux-2.6-bk/arch/ppc64/mm/fault.c~nx-kernel-ppc64 2005-03-08 16:08:57 -06:00
> +++ linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c 2005-03-08 16:08:57 -06:00
> @@ -76,6 +76,21 @@ static int store_updates_sp(struct pt_re
> return 0;
> }
>
> +pte_t *lookup_address(unsigned long address)
> +{
> + pgd_t *pgd = pgd_offset_k(address);
> + pmd_t *pmd;
> +
> + if (pgd_none(*pgd))
> + return NULL;
> +
> + pmd = pmd_offset(pgd, address);
> + if (pmd_none(*pmd))
> + return NULL;
> +
> + return pte_offset_kernel(pmd, address);
> +}

Use find_linux_pte() here (asm-ppc64/pgtable.h). It will return NULL of
the PTE is not present too, so no need to dbl check that. That way, I
won't have to fix your copy of the function when I get the proper 4L
headers patch in ;)

> /*
> * The error_code parameter is
> * - DSISR for a non-SLB data access fault,
> @@ -94,6 +109,7 @@ int do_page_fault(struct pt_regs *regs,
> unsigned long is_write = error_code & 0x02000000;
> unsigned long trap = TRAP(regs);
> unsigned long is_exec = trap == 0x400;
> + pte_t *ptep;
>
> BUG_ON((trap == 0x380) || (trap == 0x480));
>
> @@ -253,6 +269,15 @@ bad_area_nosemaphore:
> info.si_addr = (void __user *) address;
> force_sig_info(SIGSEGV, &info, current);
> return 0;
> + }
> +
> + ptep = lookup_address(address);
> +
> + if (ptep && pte_present(*ptep) && !pte_exec(*ptep)) {
> + if (printk_ratelimit())
> + printk(KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", current->uid);
> + show_stack(current, (unsigned long *)__get_SP());
> + do_exit(SIGKILL);
> }

Can you try to limit to 80 columns ? (I know, I'm not the best for that
neither, but I'm trying to cure myself here, I promise my next rewrite
of radeonfb will be fully 80-columns safe :)




2005-03-10 03:30:06

by Olof Johansson

[permalink] [raw]
Subject: Re: [PATCH 1/2] No-exec support for ppc64

On Tue, Mar 08, 2005 at 05:08:26PM -0600, Jake Moilanen wrote:
> No-exec base and user space support for PPC64.

Hi, a couple of comments below.


-Olof

> @@ -786,6 +786,7 @@ int hash_huge_page(struct mm_struct *mm,
> pte_t old_pte, new_pte;
> unsigned long hpteflags, prpn;
> long slot;
> + int is_exec;
> int err = 1;
>
> spin_lock(&mm->page_table_lock);
> @@ -796,6 +797,10 @@ int hash_huge_page(struct mm_struct *mm,
> va = (vsid << 28) | (ea & 0x0fffffff);
> vpn = va >> HPAGE_SHIFT;
>
> + is_exec = access & _PAGE_EXEC;
> + if (unlikely(is_exec && !(pte_val(*ptep) & _PAGE_EXEC)))
> + goto out;

You only use is_exec this one time, you can probably skip it and just
add the mask in the if statement.

> @@ -898,6 +908,7 @@ repeat:
> err = 0;
>
> out:
> +
> spin_unlock(&mm->page_table_lock);

Whitespace change

> diff -puN include/asm-ppc64/pgtable.h~nx-user-ppc64 include/asm-ppc64/pgtable.h
> --- linux-2.6-bk/include/asm-ppc64/pgtable.h~nx-user-ppc64 2005-03-08 16:08:54 -06:00
> +++ linux-2.6-bk-moilanen/include/asm-ppc64/pgtable.h 2005-03-08 16:08:54 -06:00
> @@ -82,14 +82,14 @@
> #define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
> #define _PAGE_USER 0x0002 /* matches one of the PP bits */
> #define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
> -#define _PAGE_RW 0x0004 /* software: user write access allowed */
> +#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */

Good to see the comment there, I remember we talked about that earlier.
It can be somewhat confusing. :-)

> #define _PAGE_GUARDED 0x0008
> #define _PAGE_COHERENT 0x0010 /* M: enforce memory coherence (SMP systems) */
> #define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */
> #define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */
> #define _PAGE_DIRTY 0x0080 /* C: page changed */
> #define _PAGE_ACCESSED 0x0100 /* R: page referenced */
> -#define _PAGE_EXEC 0x0200 /* software: i-cache coherence required */
> +#define _PAGE_RW 0x0200 /* software: user write access allowed */
> #define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
> #define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
> #define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
> @@ -100,7 +100,7 @@
> /* PAGE_MASK gives the right answer below, but only by accident */
> /* It should be preserving the high 48 bits and then specifically */
> /* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
> -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HPTEFLAGS)
> +#define _PAGE_CHG_MASK (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | _PAGE_WRITETHRU | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_HPTEFLAGS | PAGE_MASK)

Can you break it into 80 columns with \ ?

2005-03-10 03:30:05

by Olof Johansson

[permalink] [raw]
Subject: Re: [PATCH 2/2] No-exec support for ppc64

Hi,

On Tue, Mar 08, 2005 at 05:13:26PM -0600, Jake Moilanen wrote:
> diff -puN arch/ppc64/mm/hash_utils.c~nx-kernel-ppc64 arch/ppc64/mm/hash_utils.c
> --- linux-2.6-bk/arch/ppc64/mm/hash_utils.c~nx-kernel-ppc64 2005-03-08 16:08:57 -06:00
> +++ linux-2.6-bk-moilanen/arch/ppc64/mm/hash_utils.c 2005-03-08 16:08:57 -06:00
> @@ -89,12 +90,23 @@ static inline void loop_forever(void)
> ;
> }
>
> +int is_kernel_text(unsigned long addr)
> +{
> + if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
> + return 1;
> +
> + return 0;
> +}

This is used in two files, but never declared extern in the second file
(iSeries_setup.c). Should it go in a header file as a static inline
instead?

There also seems to be a local static is_kernel_text() in kallsyms that
overlaps (but it's not identical). Removing that redundancy can be taken
care of as a janitorial patch outside of the noexec stuff.



-Olof

2005-03-10 07:20:52

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 2/2] No-exec support for ppc64

On Wed, 2005-03-09 at 21:25 -0600, Olof Johansson wrote:
> Hi,
>
> On Tue, Mar 08, 2005 at 05:13:26PM -0600, Jake Moilanen wrote:
> > diff -puN arch/ppc64/mm/hash_utils.c~nx-kernel-ppc64 arch/ppc64/mm/hash_utils.c
> > --- linux-2.6-bk/arch/ppc64/mm/hash_utils.c~nx-kernel-ppc64 2005-03-08 16:08:57 -06:00
> > +++ linux-2.6-bk-moilanen/arch/ppc64/mm/hash_utils.c 2005-03-08 16:08:57 -06:00
> > @@ -89,12 +90,23 @@ static inline void loop_forever(void)
> > ;
> > }
> >
> > +int is_kernel_text(unsigned long addr)
> > +{
> > + if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
> > + return 1;
> > +
> > + return 0;
> > +}
>
> This is used in two files, but never declared extern in the second file
> (iSeries_setup.c). Should it go in a header file as a static inline
> instead?

Yes, I think it should.

> There also seems to be a local static is_kernel_text() in kallsyms that
> overlaps (but it's not identical). Removing that redundancy can be taken
> care of as a janitorial patch outside of the noexec stuff.
>
>
>
> -Olof
> _______________________________________________
> Linuxppc64-dev mailing list
> [email protected]
> https://ozlabs.org/cgi-bin/mailman/listinfo/linuxppc64-dev
--
Benjamin Herrenschmidt <[email protected]>

2005-03-10 22:41:17

by Jake Moilanen

[permalink] [raw]
Subject: Re: [PATCH 1/2] No-exec support for ppc64

On Wed, 9 Mar 2005 21:22:13 -0600
[email protected] (Olof Johansson) wrote:

> On Tue, Mar 08, 2005 at 05:08:26PM -0600, Jake Moilanen wrote:
> > No-exec base and user space support for PPC64.
>
> Hi, a couple of comments below.
>

Here's the revised user & base support for no-exec on ppc64 with Olof
and Ben's comments.

Signed-off-by: Jake Moilanen <[email protected]>

---

linux-2.6-bk-moilanen/arch/ppc64/kernel/head.S | 5 +
linux-2.6-bk-moilanen/arch/ppc64/kernel/iSeries_htab.c | 4 +
linux-2.6-bk-moilanen/arch/ppc64/kernel/pSeries_lpar.c | 2
linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c | 14 +++--
linux-2.6-bk-moilanen/arch/ppc64/mm/hash_low.S | 12 ++--
linux-2.6-bk-moilanen/arch/ppc64/mm/hugetlbpage.c | 10 +++
linux-2.6-bk-moilanen/fs/binfmt_elf.c | 2
linux-2.6-bk-moilanen/include/asm-ppc64/elf.h | 7 ++
linux-2.6-bk-moilanen/include/asm-ppc64/page.h | 19 ++++++-
linux-2.6-bk-moilanen/include/asm-ppc64/pgtable.h | 46 +++++++++--------
10 files changed, 85 insertions(+), 36 deletions(-)

diff -puN arch/ppc64/kernel/head.S~nx-user-ppc64 arch/ppc64/kernel/head.S
--- linux-2.6-bk/arch/ppc64/kernel/head.S~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/kernel/head.S 2005-03-08 16:08:54 -06:00
@@ -36,6 +36,7 @@
#include <asm/offsets.h>
#include <asm/bug.h>
#include <asm/cputable.h>
+#include <asm/pgtable.h>
#include <asm/setup.h>

#ifdef CONFIG_PPC_ISERIES
@@ -950,11 +951,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
* accessing a userspace segment (even from the kernel). We assume
* kernel addresses always have the high bit set.
*/
- rlwinm r4,r4,32-23,29,29 /* DSISR_STORE -> _PAGE_RW */
+ rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
orc r0,r12,r0 /* MSR_PR | ~high_bit */
rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
- ori r4,r4,1 /* add _PAGE_PRESENT */
+ rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */

/*
* On iSeries, we soft-disable interrupts here, then
diff -puN arch/ppc64/kernel/iSeries_htab.c~nx-user-ppc64 arch/ppc64/kernel/iSeries_htab.c
--- linux-2.6-bk/arch/ppc64/kernel/iSeries_htab.c~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/kernel/iSeries_htab.c 2005-03-08 16:08:54 -06:00
@@ -144,6 +144,10 @@ static long iSeries_hpte_updatepp(unsign

HvCallHpt_get(&hpte, slot);
if ((hpte.dw0.dw0.avpn == avpn) && (hpte.dw0.dw0.v)) {
+ /*
+ * Hypervisor expects bit's as NPPP, which is
+ * different from how they are mapped in our PP.
+ */
HvCallHpt_setPp(slot, (newpp & 0x3) | ((newpp & 0x4) << 1));
iSeries_hunlock(slot);
return 0;
diff -puN arch/ppc64/kernel/pSeries_lpar.c~nx-user-ppc64 arch/ppc64/kernel/pSeries_lpar.c
--- linux-2.6-bk/arch/ppc64/kernel/pSeries_lpar.c~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/kernel/pSeries_lpar.c 2005-03-08 16:08:54 -06:00
@@ -470,7 +470,7 @@ static void pSeries_lpar_hpte_updatebolt
slot = pSeries_lpar_hpte_find(vpn);
BUG_ON(slot == -1);

- flags = newpp & 3;
+ flags = newpp & 7;
lpar_rc = plpar_pte_protect(flags, slot, 0);

BUG_ON(lpar_rc != H_Success);
diff -puN arch/ppc64/mm/fault.c~nx-user-ppc64 arch/ppc64/mm/fault.c
--- linux-2.6-bk/arch/ppc64/mm/fault.c~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c 2005-03-10 16:14:45 -06:00
@@ -93,6 +93,7 @@ int do_page_fault(struct pt_regs *regs,
unsigned long code = SEGV_MAPERR;
unsigned long is_write = error_code & 0x02000000;
unsigned long trap = TRAP(regs);
+ unsigned long is_exec = trap == 0x400;

BUG_ON((trap == 0x380) || (trap == 0x480));

@@ -199,16 +200,19 @@ int do_page_fault(struct pt_regs *regs,
good_area:
code = SEGV_ACCERR;

+ if (is_exec) {
+ /* protection fault */
+ if (error_code & 0x08000000)
+ goto bad_area;
+ if (!(vma->vm_flags & VM_EXEC))
+ goto bad_area;
/* a write */
- if (is_write) {
+ } else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
/* a read */
} else {
- /* protection fault */
- if (error_code & 0x08000000)
- goto bad_area;
- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ if (!(vma->vm_flags & VM_READ))
goto bad_area;
}

diff -puN arch/ppc64/mm/hash_low.S~nx-user-ppc64 arch/ppc64/mm/hash_low.S
--- linux-2.6-bk/arch/ppc64/mm/hash_low.S~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/mm/hash_low.S 2005-03-08 16:08:54 -06:00
@@ -89,7 +89,7 @@ _GLOBAL(__hash_page)
/* Prepare new PTE value (turn access RW into DIRTY, then
* add BUSY,HASHPTE and ACCESSED)
*/
- rlwinm r30,r4,5,24,24 /* _PAGE_RW -> _PAGE_DIRTY */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
or r30,r30,r31
ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
/* Write the linux PTE atomically (setting busy) */
@@ -112,11 +112,11 @@ _GLOBAL(__hash_page)
rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
xor r28,r5,r0
-
- /* Convert linux PTE bits into HW equivalents
- */
- andi. r3,r30,0x1fa /* Get basic set of flags */
- rlwinm r0,r30,32-2+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
andc r0,r30,r0 /* r0 = pte & ~r0 */
diff -puN arch/ppc64/mm/hugetlbpage.c~nx-user-ppc64 arch/ppc64/mm/hugetlbpage.c
--- linux-2.6-bk/arch/ppc64/mm/hugetlbpage.c~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/mm/hugetlbpage.c 2005-03-10 13:46:08 -06:00
@@ -796,6 +796,9 @@ int hash_huge_page(struct mm_struct *mm,
va = (vsid << 28) | (ea & 0x0fffffff);
vpn = va >> HPAGE_SHIFT;

+ if (unlikely((access & _PAGE_EXEC) && !(pte_val(*ptep) & _PAGE_EXEC)))
+ goto out;
+
/*
* If no pte found or not present, send the problem up to
* do_page_fault
@@ -828,7 +831,12 @@ int hash_huge_page(struct mm_struct *mm,
old_pte = *ptep;
new_pte = old_pte;

- hpteflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+ hpteflags = (pte_val(new_pte) & _PAGE_RW) |
+ (!(pte_val(new_pte) & _PAGE_RW)) |
+ _PAGE_USER;
+
+ /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
+ hpteflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);

/* Check if pte already has an hpte (case 2) */
if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
diff -puN fs/binfmt_elf.c~nx-user-ppc64 fs/binfmt_elf.c
--- linux-2.6-bk/fs/binfmt_elf.c~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/fs/binfmt_elf.c 2005-03-08 16:08:54 -06:00
@@ -99,6 +99,8 @@ static int set_brk(unsigned long start,
up_write(&current->mm->mmap_sem);
if (BAD_ADDR(addr))
return addr;
+
+ sys_mprotect(start, end-start, PROT_READ|PROT_WRITE|PROT_EXEC);
}
current->mm->start_brk = current->mm->brk = end;
return 0;
diff -puN include/asm-ppc64/elf.h~nx-user-ppc64 include/asm-ppc64/elf.h
--- linux-2.6-bk/include/asm-ppc64/elf.h~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/include/asm-ppc64/elf.h 2005-03-08 16:23:37 -06:00
@@ -226,6 +226,13 @@ do { \
else if (current->personality != PER_LINUX32) \
set_personality(PER_LINUX); \
} while (0)
+
+/*
+ * An executable for which elf_read_implies_exec() returns TRUE will
+ * have the READ_IMPLIES_EXEC personality flag set automatically.
+ */
+#define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack))
+
#endif

/*
diff -puN include/asm-ppc64/page.h~nx-user-ppc64 include/asm-ppc64/page.h
--- linux-2.6-bk/include/asm-ppc64/page.h~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/include/asm-ppc64/page.h 2005-03-08 16:08:54 -06:00
@@ -235,8 +235,25 @@ extern u64 ppc64_pft_size; /* Log 2 of

#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)

-#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
+#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS \
+ (test_thread_flag(TIF_32BIT) ? \
+ VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
+
+#define VM_STACK_DEFAULT_FLAGS \
+ (test_thread_flag(TIF_32BIT) ? \
+ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)

#endif /* __KERNEL__ */
#endif /* _PPC64_PAGE_H */
diff -puN include/asm-ppc64/pgtable.h~nx-user-ppc64 include/asm-ppc64/pgtable.h
--- linux-2.6-bk/include/asm-ppc64/pgtable.h~nx-user-ppc64 2005-03-08 16:08:54 -06:00
+++ linux-2.6-bk-moilanen/include/asm-ppc64/pgtable.h 2005-03-10 16:14:45 -06:00
@@ -82,14 +82,14 @@
#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
#define _PAGE_USER 0x0002 /* matches one of the PP bits */
#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
-#define _PAGE_RW 0x0004 /* software: user write access allowed */
+#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */
#define _PAGE_GUARDED 0x0008
#define _PAGE_COHERENT 0x0010 /* M: enforce memory coherence (SMP systems) */
#define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */
#define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */
#define _PAGE_DIRTY 0x0080 /* C: page changed */
#define _PAGE_ACCESSED 0x0100 /* R: page referenced */
-#define _PAGE_EXEC 0x0200 /* software: i-cache coherence required */
+#define _PAGE_RW 0x0200 /* software: user write access allowed */
#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
@@ -100,7 +100,8 @@
/* PAGE_MASK gives the right answer below, but only by accident */
/* It should be preserving the high 48 bits and then specifically */
/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HPTEFLAGS)
+#define _PAGE_CHG_MASK (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | _PAGE_WRITETHRU | \
+ _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_HPTEFLAGS | PAGE_MASK)

#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_COHERENT)

@@ -116,31 +117,38 @@
#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_WRENABLE)
-#define PAGE_KERNEL_CI __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
- _PAGE_WRENABLE | _PAGE_NO_CACHE | _PAGE_GUARDED)
+
+#define HW_NO_EXEC _PAGE_EXEC /* This is used when the bit is
+ * inverted, even though it's the
+ * same value, hopefully it will be
+ * clearer in the code what is
+ * going on. */

/*
- * The PowerPC can only do execute protection on a segment (256MB) basis,
- * not on a page basis. So we consider execute permission the same as read.
+ * POWER4 and newer have per page execute protection, older chips can only
+ * do this on a segment (256MB) basis.
+ *
* Also, write permissions imply read permissions.
* This is the closest we can get..
+ *
+ * Note due to the way vm flags are laid out, the bits are XWR
*/
#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY_X
+#define __P001 PAGE_READONLY
#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY_X
-#define __P100 PAGE_READONLY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_X
#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY
+#define __P110 PAGE_COPY_X
#define __P111 PAGE_COPY_X

#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY_X
+#define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED_X
-#define __S100 PAGE_READONLY
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_X
#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED
+#define __S110 PAGE_SHARED_X
#define __S111 PAGE_SHARED_X

#ifndef __ASSEMBLY__
@@ -197,7 +205,8 @@ void hugetlb_mm_free_pgd(struct mm_struc
})

#define pte_modify(_pte, newprot) \
- (__pte((pte_val(_pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)))
+ (__pte((pte_val(_pte) & _PAGE_CHG_MASK) | \
+ (pgprot_val(newprot) & ~_PAGE_CHG_MASK)))

#define pte_none(pte) ((pte_val(pte) & ~_PAGE_HPTEFLAGS) == 0)
#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT)
@@ -266,9 +275,6 @@ static inline int pte_young(pte_t pte) {
static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;}
static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE;}

-static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; }
-static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; }
-
static inline pte_t pte_rdprotect(pte_t pte) {
pte_val(pte) &= ~_PAGE_USER; return pte; }
static inline pte_t pte_exprotect(pte_t pte) {
@@ -438,7 +444,7 @@ static inline void set_pte_at(struct mm_
static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry, int dirty)
{
unsigned long bits = pte_val(entry) &
- (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW);
+ (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
unsigned long old, tmp;

__asm__ __volatile__(

_

2005-03-10 22:46:23

by Jake Moilanen

[permalink] [raw]
Subject: Re: [PATCH 2/2] No-exec support for ppc64

On Thu, 10 Mar 2005 18:15:34 +1100
Benjamin Herrenschmidt <[email protected]> wrote:

> On Wed, 2005-03-09 at 21:25 -0600, Olof Johansson wrote:
> > Hi,
> >
> > On Tue, Mar 08, 2005 at 05:13:26PM -0600, Jake Moilanen wrote:
> > > diff -puN arch/ppc64/mm/hash_utils.c~nx-kernel-ppc64 arch/ppc64/mm/hash_utils.c
> > > --- linux-2.6-bk/arch/ppc64/mm/hash_utils.c~nx-kernel-ppc64 2005-03-08 16:08:57 -06:00
> > > +++ linux-2.6-bk-moilanen/arch/ppc64/mm/hash_utils.c 2005-03-08 16:08:57 -06:00
> > > @@ -89,12 +90,23 @@ static inline void loop_forever(void)
> > > ;
> > > }
> > >
> > > +int is_kernel_text(unsigned long addr)
> > > +{
> > > + if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
> > > + return 1;
> > > +
> > > + return 0;
> > > +}
> >
> > This is used in two files, but never declared extern in the second file
> > (iSeries_setup.c). Should it go in a header file as a static inline
> > instead?
>
> Yes, I think it should.
>

Here is the revised no-exec for the kernel on ppc64 w/ Olof and Ben's comments.

Signed-off-by: Jake Moilanen <[email protected]>

---

linux-2.6-bk-moilanen/arch/ppc64/kernel/iSeries_setup.c | 4 +++
linux-2.6-bk-moilanen/arch/ppc64/kernel/module.c | 3 +-
linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c | 19 ++++++++++++++
linux-2.6-bk-moilanen/arch/ppc64/mm/hash_utils.c | 21 ++++++++++------
linux-2.6-bk-moilanen/include/asm-ppc64/pgtable.h | 1
linux-2.6-bk-moilanen/include/asm-ppc64/sections.h | 9 ++++++
6 files changed, 49 insertions(+), 8 deletions(-)

diff -puN arch/ppc64/kernel/iSeries_setup.c~nx-kernel-ppc64 arch/ppc64/kernel/iSeries_setup.c
--- linux-2.6-bk/arch/ppc64/kernel/iSeries_setup.c~nx-kernel-ppc64 2005-03-10 13:54:14 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/kernel/iSeries_setup.c 2005-03-10 13:59:12 -06:00
@@ -633,6 +633,10 @@ static void __init iSeries_bolt_kernel(u
unsigned long vpn = va >> PAGE_SHIFT;
unsigned long slot = HvCallHpt_findValid(&hpte, vpn);

+ /* Make non-kernel text non-executable */
+ if (!in_kernel_text(ea))
+ mode_rw |= HW_NO_EXEC;
+
if (hpte.dw0.dw0.v) {
/* HPTE exists, so just bolt it */
HvCallHpt_setSwBits(slot, 0x10, 0);
diff -puN arch/ppc64/kernel/module.c~nx-kernel-ppc64 arch/ppc64/kernel/module.c
--- linux-2.6-bk/arch/ppc64/kernel/module.c~nx-kernel-ppc64 2005-03-10 13:54:14 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/kernel/module.c 2005-03-10 13:54:14 -06:00
@@ -102,7 +102,8 @@ void *module_alloc(unsigned long size)
{
if (size == 0)
return NULL;
- return vmalloc(size);
+
+ return vmalloc_exec(size);
}

/* Free memory returned from module_alloc */
diff -puN arch/ppc64/mm/fault.c~nx-kernel-ppc64 arch/ppc64/mm/fault.c
--- linux-2.6-bk/arch/ppc64/mm/fault.c~nx-kernel-ppc64 2005-03-10 13:54:14 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c 2005-03-10 13:54:14 -06:00
@@ -76,6 +76,13 @@ static int store_updates_sp(struct pt_re
return 0;
}

+pte_t *lookup_address(unsigned long address)
+{
+ pgd_t *pgd = pgd_offset_k(address);
+
+ return find_linux_pte(pgd, address);
+}
+
/*
* The error_code parameter is
* - DSISR for a non-SLB data access fault,
@@ -94,6 +101,7 @@ int do_page_fault(struct pt_regs *regs,
unsigned long is_write = error_code & 0x02000000;
unsigned long trap = TRAP(regs);
unsigned long is_exec = trap == 0x400;
+ pte_t *ptep;

BUG_ON((trap == 0x380) || (trap == 0x480));

@@ -253,6 +261,17 @@ bad_area_nosemaphore:
info.si_addr = (void __user *) address;
force_sig_info(SIGSEGV, &info, current);
return 0;
+ }
+
+ ptep = lookup_address(address);
+
+ if (ptep && pte_present(*ptep) && !pte_exec(*ptep)) {
+ if (printk_ratelimit())
+ printk(KERN_CRIT "kernel tried to execute NX-protected "
+ "page - exploit attempt? (uid: %d)\n",
+ current->uid);
+ show_stack(current, (unsigned long *)__get_SP());
+ do_exit(SIGKILL);
}

return SIGSEGV;
diff -puN arch/ppc64/mm/hash_utils.c~nx-kernel-ppc64 arch/ppc64/mm/hash_utils.c
--- linux-2.6-bk/arch/ppc64/mm/hash_utils.c~nx-kernel-ppc64 2005-03-10 13:54:14 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/mm/hash_utils.c 2005-03-10 13:58:37 -06:00
@@ -51,6 +51,7 @@
#include <asm/cacheflush.h>
#include <asm/cputable.h>
#include <asm/abs_addr.h>
+#include <asm/sections.h>

#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -95,6 +96,7 @@ static inline void create_pte_mapping(un
{
unsigned long addr;
unsigned int step;
+ unsigned long tmp_mode;

if (large)
step = 16*MB;
@@ -112,6 +114,13 @@ static inline void create_pte_mapping(un
else
vpn = va >> PAGE_SHIFT;

+
+ tmp_mode = mode;
+
+ /* Make non-kernel text non-executable */
+ if (!in_kernel_text(addr))
+ tmp_mode = mode | HW_NO_EXEC;
+
hash = hpt_hash(vpn, large);

hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -120,12 +129,12 @@ static inline void create_pte_mapping(un
if (systemcfg->platform & PLATFORM_LPAR)
ret = pSeries_lpar_hpte_insert(hpteg, va,
virt_to_abs(addr) >> PAGE_SHIFT,
- 0, mode, 1, large);
+ 0, tmp_mode, 1, large);
else
#endif /* CONFIG_PPC_PSERIES */
ret = native_hpte_insert(hpteg, va,
virt_to_abs(addr) >> PAGE_SHIFT,
- 0, mode, 1, large);
+ 0, tmp_mode, 1, large);

if (ret == -1) {
ppc64_terminate_msg(0x20, "create_pte_mapping");
@@ -238,8 +247,6 @@ unsigned int hash_page_do_lazy_icache(un
{
struct page *page;

-#define PPC64_HWNOEXEC (1 << 2)
-
if (!pfn_valid(pte_pfn(pte)))
return pp;

@@ -250,8 +257,8 @@ unsigned int hash_page_do_lazy_icache(un
if (trap == 0x400) {
__flush_dcache_icache(page_address(page));
set_bit(PG_arch_1, &page->flags);
- } else
- pp |= PPC64_HWNOEXEC;
+ } else
+ pp |= HW_NO_EXEC;
}
return pp;
}
@@ -271,7 +278,7 @@ int hash_page(unsigned long ea, unsigned
int user_region = 0;
int local = 0;
cpumask_t tmp;
-
+
switch (REGION_ID(ea)) {
case USER_REGION_ID:
user_region = 1;
diff -puN include/asm-ppc64/pgtable.h~nx-kernel-ppc64 include/asm-ppc64/pgtable.h
--- linux-2.6-bk/include/asm-ppc64/pgtable.h~nx-kernel-ppc64 2005-03-10 13:54:14 -06:00
+++ linux-2.6-bk-moilanen/include/asm-ppc64/pgtable.h 2005-03-10 13:54:14 -06:00
@@ -117,6 +117,7 @@
#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_WRENABLE)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_EXEC)

#define HW_NO_EXEC _PAGE_EXEC /* This is used when the bit is
* inverted, even though it's the
diff -puN include/asm-ppc64/sections.h~nx-kernel-ppc64 include/asm-ppc64/sections.h
--- linux-2.6-bk/include/asm-ppc64/sections.h~nx-kernel-ppc64 2005-03-10 13:54:14 -06:00
+++ linux-2.6-bk-moilanen/include/asm-ppc64/sections.h 2005-03-10 13:58:12 -06:00
@@ -17,4 +17,13 @@ extern char _end[];
#define __openfirmware
#define __openfirmwaredata

+
+static inline int in_kernel_text(unsigned long addr)
+{
+ if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
+ return 1;
+
+ return 0;
+}
+
#endif

_

2005-03-10 22:56:00

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 2/2] No-exec support for ppc64



> /* Free memory returned from module_alloc */
> diff -puN arch/ppc64/mm/fault.c~nx-kernel-ppc64 arch/ppc64/mm/fault.c
> --- linux-2.6-bk/arch/ppc64/mm/fault.c~nx-kernel-ppc64 2005-03-10 13:54:14 -06:00
> +++ linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c 2005-03-10 13:54:14 -06:00
> @@ -76,6 +76,13 @@ static int store_updates_sp(struct pt_re
> return 0;
> }
>
> +pte_t *lookup_address(unsigned long address)
> +{
> + pgd_t *pgd = pgd_offset_k(address);
> +
> + return find_linux_pte(pgd, address);
> +}

static please, even inline in this case.

I've removed Andrew from CC upon his request, Paul, Anton or I will
forward to him when it's ready, no need to clobber his mailbox in the
meantime.

Ben.


2005-03-11 14:09:58

by Jake Moilanen

[permalink] [raw]
Subject: Re: [PATCH 2/2] No-exec support for ppc64

On Fri, 11 Mar 2005 09:44:28 +1100
Benjamin Herrenschmidt <[email protected]> wrote:

>
>
> > /* Free memory returned from module_alloc */
> > diff -puN arch/ppc64/mm/fault.c~nx-kernel-ppc64 arch/ppc64/mm/fault.c
> > --- linux-2.6-bk/arch/ppc64/mm/fault.c~nx-kernel-ppc64 2005-03-10 13:54:14 -06:00
> > +++ linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c 2005-03-10 13:54:14 -06:00
> > @@ -76,6 +76,13 @@ static int store_updates_sp(struct pt_re
> > return 0;
> > }
> >
> > +pte_t *lookup_address(unsigned long address)
> > +{
> > + pgd_t *pgd = pgd_offset_k(address);
> > +
> > + return find_linux_pte(pgd, address);
> > +}
>
> static please, even inline in this case.
>
> I've removed Andrew from CC upon his request, Paul, Anton or I will
> forward to him when it's ready, no need to clobber his mailbox in the
> meantime.

3rd time is a charm.

Signed-off-by: Jake Moilanen <[email protected]>

---

linux-2.6-bk-moilanen/arch/ppc64/kernel/iSeries_setup.c | 4 +++
linux-2.6-bk-moilanen/arch/ppc64/kernel/module.c | 3 +-
linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c | 19 ++++++++++++++++
linux-2.6-bk-moilanen/arch/ppc64/mm/hash_utils.c | 19 ++++++++++------
linux-2.6-bk-moilanen/include/asm-ppc64/pgtable.h | 1
linux-2.6-bk-moilanen/include/asm-ppc64/sections.h | 9 +++++++
6 files changed, 48 insertions(+), 7 deletions(-)

diff -puN arch/ppc64/kernel/iSeries_setup.c~nx-kernel-ppc64 arch/ppc64/kernel/iSeries_setup.c
--- linux-2.6-bk/arch/ppc64/kernel/iSeries_setup.c~nx-kernel-ppc64 2005-03-11 07:50:39 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/kernel/iSeries_setup.c 2005-03-11 07:50:39 -06:00
@@ -633,6 +633,10 @@ static void __init iSeries_bolt_kernel(u
unsigned long vpn = va >> PAGE_SHIFT;
unsigned long slot = HvCallHpt_findValid(&hpte, vpn);

+ /* Make non-kernel text non-executable */
+ if (!in_kernel_text(ea))
+ mode_rw |= HW_NO_EXEC;
+
if (hpte.dw0.dw0.v) {
/* HPTE exists, so just bolt it */
HvCallHpt_setSwBits(slot, 0x10, 0);
diff -puN arch/ppc64/kernel/module.c~nx-kernel-ppc64 arch/ppc64/kernel/module.c
--- linux-2.6-bk/arch/ppc64/kernel/module.c~nx-kernel-ppc64 2005-03-11 07:50:39 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/kernel/module.c 2005-03-11 07:50:39 -06:00
@@ -102,7 +102,8 @@ void *module_alloc(unsigned long size)
{
if (size == 0)
return NULL;
- return vmalloc(size);
+
+ return vmalloc_exec(size);
}

/* Free memory returned from module_alloc */
diff -puN arch/ppc64/mm/fault.c~nx-kernel-ppc64 arch/ppc64/mm/fault.c
--- linux-2.6-bk/arch/ppc64/mm/fault.c~nx-kernel-ppc64 2005-03-11 07:50:39 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/mm/fault.c 2005-03-11 07:50:57 -06:00
@@ -76,6 +76,13 @@ static int store_updates_sp(struct pt_re
return 0;
}

+static inline pte_t *lookup_address(unsigned long address)
+{
+ pgd_t *pgd = pgd_offset_k(address);
+
+ return find_linux_pte(pgd, address);
+}
+
/*
* The error_code parameter is
* - DSISR for a non-SLB data access fault,
@@ -94,6 +101,7 @@ int do_page_fault(struct pt_regs *regs,
unsigned long is_write = error_code & 0x02000000;
unsigned long trap = TRAP(regs);
unsigned long is_exec = trap == 0x400;
+ pte_t *ptep;

BUG_ON((trap == 0x380) || (trap == 0x480));

@@ -253,6 +261,17 @@ bad_area_nosemaphore:
info.si_addr = (void __user *) address;
force_sig_info(SIGSEGV, &info, current);
return 0;
+ }
+
+ ptep = lookup_address(address);
+
+ if (ptep && pte_present(*ptep) && !pte_exec(*ptep)) {
+ if (printk_ratelimit())
+ printk(KERN_CRIT "kernel tried to execute NX-protected "
+ "page - exploit attempt? (uid: %d)\n",
+ current->uid);
+ show_stack(current, (unsigned long *)__get_SP());
+ do_exit(SIGKILL);
}

return SIGSEGV;
diff -puN arch/ppc64/mm/hash_utils.c~nx-kernel-ppc64 arch/ppc64/mm/hash_utils.c
--- linux-2.6-bk/arch/ppc64/mm/hash_utils.c~nx-kernel-ppc64 2005-03-11 07:50:39 -06:00
+++ linux-2.6-bk-moilanen/arch/ppc64/mm/hash_utils.c 2005-03-11 07:59:53 -06:00
@@ -51,6 +51,7 @@
#include <asm/cacheflush.h>
#include <asm/cputable.h>
#include <asm/abs_addr.h>
+#include <asm/sections.h>

#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -95,6 +96,7 @@ static inline void create_pte_mapping(un
{
unsigned long addr;
unsigned int step;
+ unsigned long tmp_mode;

if (large)
step = 16*MB;
@@ -112,6 +114,13 @@ static inline void create_pte_mapping(un
else
vpn = va >> PAGE_SHIFT;

+
+ tmp_mode = mode;
+
+ /* Make non-kernel text non-executable */
+ if (!in_kernel_text(addr))
+ tmp_mode = mode | HW_NO_EXEC;
+
hash = hpt_hash(vpn, large);

hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -120,12 +129,12 @@ static inline void create_pte_mapping(un
if (systemcfg->platform & PLATFORM_LPAR)
ret = pSeries_lpar_hpte_insert(hpteg, va,
virt_to_abs(addr) >> PAGE_SHIFT,
- 0, mode, 1, large);
+ 0, tmp_mode, 1, large);
else
#endif /* CONFIG_PPC_PSERIES */
ret = native_hpte_insert(hpteg, va,
virt_to_abs(addr) >> PAGE_SHIFT,
- 0, mode, 1, large);
+ 0, tmp_mode, 1, large);

if (ret == -1) {
ppc64_terminate_msg(0x20, "create_pte_mapping");
@@ -238,8 +247,6 @@ unsigned int hash_page_do_lazy_icache(un
{
struct page *page;

-#define PPC64_HWNOEXEC (1 << 2)
-
if (!pfn_valid(pte_pfn(pte)))
return pp;

@@ -250,8 +257,8 @@ unsigned int hash_page_do_lazy_icache(un
if (trap == 0x400) {
__flush_dcache_icache(page_address(page));
set_bit(PG_arch_1, &page->flags);
- } else
- pp |= PPC64_HWNOEXEC;
+ } else
+ pp |= HW_NO_EXEC;
}
return pp;
}
diff -puN include/asm-ppc64/pgtable.h~nx-kernel-ppc64 include/asm-ppc64/pgtable.h
--- linux-2.6-bk/include/asm-ppc64/pgtable.h~nx-kernel-ppc64 2005-03-11 07:50:39 -06:00
+++ linux-2.6-bk-moilanen/include/asm-ppc64/pgtable.h 2005-03-11 07:50:39 -06:00
@@ -117,6 +117,7 @@
#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_WRENABLE)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_EXEC)

#define HW_NO_EXEC _PAGE_EXEC /* This is used when the bit is
* inverted, even though it's the
diff -puN include/asm-ppc64/sections.h~nx-kernel-ppc64 include/asm-ppc64/sections.h
--- linux-2.6-bk/include/asm-ppc64/sections.h~nx-kernel-ppc64 2005-03-11 07:50:39 -06:00
+++ linux-2.6-bk-moilanen/include/asm-ppc64/sections.h 2005-03-11 07:50:39 -06:00
@@ -17,4 +17,13 @@ extern char _end[];
#define __openfirmware
#define __openfirmwaredata

+
+static inline int in_kernel_text(unsigned long addr)
+{
+ if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
+ return 1;
+
+ return 0;
+}
+
#endif

_

2005-03-14 10:15:12

by Paul Mackerras

[permalink] [raw]
Subject: Re: [PATCH 1/2] No-exec support for ppc64

Jake Moilanen writes:

> diff -puN fs/binfmt_elf.c~nx-user-ppc64 fs/binfmt_elf.c
> --- linux-2.6-bk/fs/binfmt_elf.c~nx-user-ppc64 2005-03-08 16:08:54 -06:00
> +++ linux-2.6-bk-moilanen/fs/binfmt_elf.c 2005-03-08 16:08:54 -06:00
> @@ -99,6 +99,8 @@ static int set_brk(unsigned long start,
> up_write(&current->mm->mmap_sem);
> if (BAD_ADDR(addr))
> return addr;
> +
> + sys_mprotect(start, end-start, PROT_READ|PROT_WRITE|PROT_EXEC);

I don't think I can push that upstream. What happens if you leave
that out?

More generally, we are making a user-visible change, even for programs
that aren't marked as having non-executable stack or heap, because we
are now enforcing that the program can't execute from mappings that
don't have PROT_EXEC. Perhaps we should enforce the requirement for
execute permission only on those programs that indicate somehow that
they can handle it?

Paul.

2005-03-14 22:04:57

by Jake Moilanen

[permalink] [raw]
Subject: Re: [PATCH 1/2] No-exec support for ppc64

On Mon, 14 Mar 2005 21:13:36 +1100
Paul Mackerras <[email protected]> wrote:

> Jake Moilanen writes:
>
> > diff -puN fs/binfmt_elf.c~nx-user-ppc64 fs/binfmt_elf.c
> > --- linux-2.6-bk/fs/binfmt_elf.c~nx-user-ppc64 2005-03-08 16:08:54 -06:00
> > +++ linux-2.6-bk-moilanen/fs/binfmt_elf.c 2005-03-08 16:08:54 -06:00
> > @@ -99,6 +99,8 @@ static int set_brk(unsigned long start,
> > up_write(&current->mm->mmap_sem);
> > if (BAD_ADDR(addr))
> > return addr;
> > +
> > + sys_mprotect(start, end-start, PROT_READ|PROT_WRITE|PROT_EXEC);
>
> I don't think I can push that upstream. What happens if you leave
> that out?

The bss and the plt are in the same segment, and plt obviously needs to
be executable.

Section Headers:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .interp PROGBITS 10000154 000154 00000d 00 A 0 0 1
...
...
[26] .plt NOBITS 10013c5c 003c34 000210 00 WAX 0 0 4
[27] .bss NOBITS 10013e6c 003c34 000128 00 WA 0 0 4


Segment Sections...
00
01 .interp
02 .interp .note.ABI-tag .note.SuSE .hash .dynsym .dynstr .gnu.version .gnu.version_r .rela.dyn .rela.plt .init .text .fini .rodata
03 .data .eh_frame .got2 .dynamic .ctors .dtors .jcr .got .sdata .sbss .plt .bss
04 .dynamic
05 .note.ABI-tag
06 .note.SuSE
07

Anton mentioned that Alan was considering putting plt into a new segment.

> More generally, we are making a user-visible change, even for programs
> that aren't marked as having non-executable stack or heap, because we
> are now enforcing that the program can't execute from mappings that
> don't have PROT_EXEC. Perhaps we should enforce the requirement for
> execute permission only on those programs that indicate somehow that
> they can handle it?

Unless a program is compiled w/ pt_gnu_stacks we will set the
READ_IMPLIES_EXEC personality and those applications should still
work as normal.

Jake

2005-03-14 22:24:05

by Paul Mackerras

[permalink] [raw]
Subject: Re: [PATCH 1/2] No-exec support for ppc64

Jake Moilanen writes:

> > I don't think I can push that upstream. What happens if you leave
> > that out?
>
> The bss and the plt are in the same segment, and plt obviously needs to
> be executable.

Yes... what I was asking was "do things actually break if you leave
that out, or does the binfmt_elf loader honour the 'x' permission on
the PT_LOAD entry for the data/bss region, meaning that it all just
works anyway?"

I did an objdump -p on some random 32-bit binaries, and they all have
"rwx" flags on the data/bss segment (the second PT_LOAD entry). And
when I look in /proc/<pid>/maps, it seems that the heap is in fact
marked executable (this is without your patch). So why do we need the
hack in binfmt_elf.c?

Paul.

2005-03-15 22:02:33

by Jake Moilanen

[permalink] [raw]
Subject: Re: [PATCH 1/2] No-exec support for ppc64

On Tue, 15 Mar 2005 09:18:04 +1100
Paul Mackerras <[email protected]> wrote:

> Jake Moilanen writes:
>
> > > I don't think I can push that upstream. What happens if you leave
> > > that out?
> >
> > The bss and the plt are in the same segment, and plt obviously needs to
> > be executable.
>
> Yes... what I was asking was "do things actually break if you leave
> that out, or does the binfmt_elf loader honour the 'x' permission on
> the PT_LOAD entry for the data/bss region, meaning that it all just
> works anyway?"

It does not work w/o the sys_mprotect. It will hang in one of the first
few binaries.

I believe the problem is that the last PT_LOAD entry does not have the
correct size, and we only mmap up to the sbss. The .sbss, .plt, and
.bss do not get mmapped with the section.

Here is /bin/bash on SLES 9:

Section Headers:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .interp PROGBITS 10000174 000174 00000d 00 A 0 0 1
...
...
[19] .data PROGBITS 1008ca80 07ca80 001b34 00 WA 0 0 4
[20] .eh_frame PROGBITS 1008e5b4 07e5b4 0000b4 00 A 0 0 4
[21] .got2 PROGBITS 1008e668 07e668 000010 00 WA 0 0 1
[22] .dynamic DYNAMIC 1008e678 07e678 0000e8 08 WA 6 0 4
[23] .ctors PROGBITS 1008e760 07e760 000008 00 WA 0 0 4
[24] .dtors PROGBITS 1008e768 07e768 000008 00 WA 0 0 4
[25] .jcr PROGBITS 1008e770 07e770 000004 00 WA 0 0 4
[26] .got PROGBITS 1008e774 07e774 000014 04 WAX 0 0 4
[27] .sdata PROGBITS 1008e788 07e788 0000d4 00 WA 0 0 4
[28] .sbss NOBITS 1008e860 07e860 000704 00 WA 0 0 8
[29] .plt NOBITS 1008ef64 07e860 000aa4 00 WAX 0 0 4
[30] .bss NOBITS 1008fa10 07e868 0062f0 00 WA 0 0 16


Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
PHDR 0x000034 0x10000034 0x10000034 0x00120 0x00120 R E 0x4
INTERP 0x000174 0x10000174 0x10000174 0x0000d 0x0000d R 0x1
[Requesting program interpreter: /lib/ld.so.1]
LOAD 0x000000 0x10000000 0x10000000 0x7ca80 0x7ca80 R E 0x10000
LOAD 0x07ca80 0x1008ca80 0x1008ca80 0x01ddc 0x09280 RWE 0x10000
DYNAMIC 0x07e678 0x1008e678 0x1008e678 0x000e8 0x000e8 RW 0x4
NOTE 0x000184 0x10000184 0x10000184 0x00020 0x00020 R 0x4
NOTE 0x0001a4 0x100001a4 0x100001a4 0x00018 0x00018 R 0x4
GNU_EH_FRAME 0x07ca54 0x1007ca54 0x1007ca54 0x0002c 0x0002c R 0x4
STACK 0x000000 0x00000000 0x00000000 0x00000 0x00000 RW 0x4


Section to Segment mapping:
Segment Sections...
00
01 .interp
02 .interp .note.ABI-tag .note.SuSE .hash .dynsym .dynstr .gnu.version .g
nu.version_r .rela.dyn .rela.plt .init .text text.unlikely text.hot .fini .rodat
a .eh_frame_hdr
03 .data .eh_frame .got2 .dynamic .ctors .dtors .jcr .got .sdata .sbss .p
lt .bss
04 .dynamic
05 .note.ABI-tag
06 .note.SuSE
07 .eh_frame_hdr
08

In the program headers section, the FileSiz for the last PT_LOAD is
0x1ddc. If we go back to the Section Headers and look at .data it is at
0x1008ca80. So the segment should end at 0x1008e85c. We round up for
alignment and we get 0x1008e860 or .sbss. The sbss, plt, and bss are
not mmapped. So the sys_mprotect is used to pick it up.

Did I miss something to explain this? Can you think of another way to
fix it?

Jake


2005-03-15 22:52:42

by Alan Modra

[permalink] [raw]
Subject: Re: [PATCH 1/2] No-exec support for ppc64

On Tue, Mar 15, 2005 at 03:51:35PM -0600, Jake Moilanen wrote:
> I believe the problem is that the last PT_LOAD entry does not have the
> correct size, and we only mmap up to the sbss. The .sbss, .plt, and
> .bss do not get mmapped with the section.

Huh? .sbss, .plt and .bss have no file contents, so of course p_filesz
doesn't cover them.

--
Alan Modra
IBM OzLabs - Linux Technology Centre

2005-03-15 23:26:37

by Jake Moilanen

[permalink] [raw]
Subject: Re: [PATCH 1/2] No-exec support for ppc64

On Wed, 16 Mar 2005 09:18:36 +1030
Alan Modra <[email protected]> wrote:

> On Tue, Mar 15, 2005 at 03:51:35PM -0600, Jake Moilanen wrote:
> > I believe the problem is that the last PT_LOAD entry does not have the
> > correct size, and we only mmap up to the sbss. The .sbss, .plt, and
> > .bss do not get mmapped with the section.
>
> Huh? .sbss, .plt and .bss have no file contents, so of course p_filesz
> doesn't cover them.

Your right, those shouldn't be mmapped.

set_brk() call is called on sbss, plt and bss. There needs to be some
method to set execute permission, on those pieces as well. Currently it
has no concept of what permission should be set.

Jake

2005-03-16 06:09:59

by Paul Mackerras

[permalink] [raw]
Subject: Re: [PATCH 1/2] No-exec support for ppc64

Jake Moilanen writes:

> It does not work w/o the sys_mprotect. It will hang in one of the first
> few binaries.

Hmmm, what distro is this with? I just tried a kernel with the patch
below on a SLES9 install and a Debian install and it came up and ran
just fine in both cases.

Paul.

diff -urN linux-2.5/arch/ppc64/kernel/head.S test/arch/ppc64/kernel/head.S
--- linux-2.5/arch/ppc64/kernel/head.S 2005-03-07 10:46:38.000000000 +1100
+++ test/arch/ppc64/kernel/head.S 2005-03-15 17:14:44.000000000 +1100
@@ -950,11 +950,12 @@
* accessing a userspace segment (even from the kernel). We assume
* kernel addresses always have the high bit set.
*/
- rlwinm r4,r4,32-23,29,29 /* DSISR_STORE -> _PAGE_RW */
+ rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
orc r0,r12,r0 /* MSR_PR | ~high_bit */
rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
ori r4,r4,1 /* add _PAGE_PRESENT */
+ rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */

/*
* On iSeries, we soft-disable interrupts here, then
diff -urN linux-2.5/arch/ppc64/kernel/iSeries_htab.c test/arch/ppc64/kernel/iSeries_htab.c
--- linux-2.5/arch/ppc64/kernel/iSeries_htab.c 2004-09-21 17:22:33.000000000 +1000
+++ test/arch/ppc64/kernel/iSeries_htab.c 2005-03-15 17:15:36.000000000 +1100
@@ -144,6 +144,10 @@

HvCallHpt_get(&hpte, slot);
if ((hpte.dw0.dw0.avpn == avpn) && (hpte.dw0.dw0.v)) {
+ /*
+ * Hypervisor expects bits as NPPP, which is
+ * different from how they are mapped in our PP.
+ */
HvCallHpt_setPp(slot, (newpp & 0x3) | ((newpp & 0x4) << 1));
iSeries_hunlock(slot);
return 0;
diff -urN linux-2.5/arch/ppc64/kernel/iSeries_setup.c test/arch/ppc64/kernel/iSeries_setup.c
--- linux-2.5/arch/ppc64/kernel/iSeries_setup.c 2005-03-07 10:46:38.000000000 +1100
+++ test/arch/ppc64/kernel/iSeries_setup.c 2005-03-15 16:55:05.000000000 +1100
@@ -633,6 +633,10 @@
unsigned long vpn = va >> PAGE_SHIFT;
unsigned long slot = HvCallHpt_findValid(&hpte, vpn);

+ /* Make non-kernel text non-executable */
+ if (!in_kernel_text(ea))
+ mode_rw |= HW_NO_EXEC;
+
if (hpte.dw0.dw0.v) {
/* HPTE exists, so just bolt it */
HvCallHpt_setSwBits(slot, 0x10, 0);
diff -urN linux-2.5/arch/ppc64/kernel/module.c test/arch/ppc64/kernel/module.c
--- linux-2.5/arch/ppc64/kernel/module.c 2004-05-10 21:25:58.000000000 +1000
+++ test/arch/ppc64/kernel/module.c 2005-03-15 16:55:05.000000000 +1100
@@ -102,7 +102,8 @@
{
if (size == 0)
return NULL;
- return vmalloc(size);
+
+ return vmalloc_exec(size);
}

/* Free memory returned from module_alloc */
diff -urN linux-2.5/arch/ppc64/kernel/pSeries_lpar.c test/arch/ppc64/kernel/pSeries_lpar.c
--- linux-2.5/arch/ppc64/kernel/pSeries_lpar.c 2005-03-07 10:46:38.000000000 +1100
+++ test/arch/ppc64/kernel/pSeries_lpar.c 2005-03-15 16:55:02.000000000 +1100
@@ -470,7 +470,7 @@
slot = pSeries_lpar_hpte_find(vpn);
BUG_ON(slot == -1);

- flags = newpp & 3;
+ flags = newpp & 7;
lpar_rc = plpar_pte_protect(flags, slot, 0);

BUG_ON(lpar_rc != H_Success);
diff -urN linux-2.5/arch/ppc64/mm/fault.c test/arch/ppc64/mm/fault.c
--- linux-2.5/arch/ppc64/mm/fault.c 2005-01-04 10:49:20.000000000 +1100
+++ test/arch/ppc64/mm/fault.c 2005-03-15 17:13:05.000000000 +1100
@@ -91,8 +91,9 @@
struct mm_struct *mm = current->mm;
siginfo_t info;
unsigned long code = SEGV_MAPERR;
- unsigned long is_write = error_code & 0x02000000;
+ unsigned long is_write = error_code & DSISR_ISSTORE;
unsigned long trap = TRAP(regs);
+ unsigned long is_exec = trap == 0x400;

BUG_ON((trap == 0x380) || (trap == 0x480));

@@ -109,7 +110,7 @@
if (!user_mode(regs) && (address >= TASK_SIZE))
return SIGSEGV;

- if (error_code & 0x00400000) {
+ if (error_code & DSISR_DABRMATCH) {
if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
11, SIGSEGV) == NOTIFY_STOP)
return 0;
@@ -199,16 +200,19 @@
good_area:
code = SEGV_ACCERR;

+ if (is_exec) {
+ /* protection fault */
+ if (error_code & DSISR_PROTFAULT)
+ goto bad_area;
+ if (!(vma->vm_flags & VM_EXEC))
+ goto bad_area;
/* a write */
- if (is_write) {
+ } else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
/* a read */
} else {
- /* protection fault */
- if (error_code & 0x08000000)
- goto bad_area;
- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ if (!(vma->vm_flags & VM_READ))
goto bad_area;
}

@@ -251,6 +255,12 @@
return 0;
}

+ if (trap == 0x400 && (error_code & DSISR_PROTFAULT)
+ && printk_ratelimit())
+ printk(KERN_CRIT "kernel tried to execute NX-protected"
+ " page (%lx) - exploit attempt? (uid: %d)\n",
+ address, current->uid);
+
return SIGSEGV;

/*
diff -urN linux-2.5/arch/ppc64/mm/hash_low.S test/arch/ppc64/mm/hash_low.S
--- linux-2.5/arch/ppc64/mm/hash_low.S 2005-01-05 13:48:02.000000000 +1100
+++ test/arch/ppc64/mm/hash_low.S 2005-03-15 16:55:02.000000000 +1100
@@ -89,7 +89,7 @@
/* Prepare new PTE value (turn access RW into DIRTY, then
* add BUSY,HASHPTE and ACCESSED)
*/
- rlwinm r30,r4,5,24,24 /* _PAGE_RW -> _PAGE_DIRTY */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
or r30,r30,r31
ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
/* Write the linux PTE atomically (setting busy) */
@@ -112,11 +112,11 @@
rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
xor r28,r5,r0
-
- /* Convert linux PTE bits into HW equivalents
- */
- andi. r3,r30,0x1fa /* Get basic set of flags */
- rlwinm r0,r30,32-2+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
andc r0,r30,r0 /* r0 = pte & ~r0 */
diff -urN linux-2.5/arch/ppc64/mm/hash_utils.c test/arch/ppc64/mm/hash_utils.c
--- linux-2.5/arch/ppc64/mm/hash_utils.c 2005-03-07 10:46:38.000000000 +1100
+++ test/arch/ppc64/mm/hash_utils.c 2005-03-15 17:20:35.000000000 +1100
@@ -51,6 +51,7 @@
#include <asm/cacheflush.h>
#include <asm/cputable.h>
#include <asm/abs_addr.h>
+#include <asm/sections.h>

#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -95,6 +96,7 @@
{
unsigned long addr;
unsigned int step;
+ unsigned long tmp_mode;

if (large)
step = 16*MB;
@@ -112,6 +114,13 @@
else
vpn = va >> PAGE_SHIFT;

+
+ tmp_mode = mode;
+
+ /* Make non-kernel text non-executable */
+ if (!in_kernel_text(addr))
+ tmp_mode = mode | HW_NO_EXEC;
+
hash = hpt_hash(vpn, large);

hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -120,12 +129,12 @@
if (systemcfg->platform & PLATFORM_LPAR)
ret = pSeries_lpar_hpte_insert(hpteg, va,
virt_to_abs(addr) >> PAGE_SHIFT,
- 0, mode, 1, large);
+ 0, tmp_mode, 1, large);
else
#endif /* CONFIG_PPC_PSERIES */
ret = native_hpte_insert(hpteg, va,
virt_to_abs(addr) >> PAGE_SHIFT,
- 0, mode, 1, large);
+ 0, tmp_mode, 1, large);

if (ret == -1) {
ppc64_terminate_msg(0x20, "create_pte_mapping");
@@ -238,8 +247,6 @@
{
struct page *page;

-#define PPC64_HWNOEXEC (1 << 2)
-
if (!pfn_valid(pte_pfn(pte)))
return pp;

@@ -251,7 +258,7 @@
__flush_dcache_icache(page_address(page));
set_bit(PG_arch_1, &page->flags);
} else
- pp |= PPC64_HWNOEXEC;
+ pp |= HW_NO_EXEC;
}
return pp;
}
diff -urN linux-2.5/arch/ppc64/mm/hugetlbpage.c test/arch/ppc64/mm/hugetlbpage.c
--- linux-2.5/arch/ppc64/mm/hugetlbpage.c 2005-03-07 14:01:43.000000000 +1100
+++ test/arch/ppc64/mm/hugetlbpage.c 2005-03-15 17:27:33.000000000 +1100
@@ -782,7 +782,6 @@
{
pte_t *ptep;
unsigned long va, vpn;
- int is_write;
pte_t old_pte, new_pte;
unsigned long hpteflags, prpn;
long slot;
@@ -809,8 +808,7 @@
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
*/
- is_write = access & _PAGE_RW;
- if (unlikely(is_write && !(pte_val(*ptep) & _PAGE_RW)))
+ if (unlikely(access & ~pte_val(*ptep)))
goto out;
/*
* At this point, we have a pte (old_pte) which can be used to build
@@ -829,6 +827,8 @@
new_pte = old_pte;

hpteflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+ /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
+ hpteflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);

/* Check if pte already has an hpte (case 2) */
if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
diff -urN linux-2.5/include/asm-ppc64/elf.h test/include/asm-ppc64/elf.h
--- linux-2.5/include/asm-ppc64/elf.h 2005-03-07 10:46:39.000000000 +1100
+++ test/include/asm-ppc64/elf.h 2005-03-15 16:55:02.000000000 +1100
@@ -226,6 +226,13 @@
else if (current->personality != PER_LINUX32) \
set_personality(PER_LINUX); \
} while (0)
+
+/*
+ * An executable for which elf_read_implies_exec() returns TRUE will
+ * have the READ_IMPLIES_EXEC personality flag set automatically.
+ */
+#define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack))
+
#endif

/*
diff -urN linux-2.5/include/asm-ppc64/page.h test/include/asm-ppc64/page.h
--- linux-2.5/include/asm-ppc64/page.h 2005-03-07 10:46:39.000000000 +1100
+++ test/include/asm-ppc64/page.h 2005-03-15 16:55:02.000000000 +1100
@@ -235,8 +235,25 @@

#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)

-#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
+#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)

+#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS \
+ (test_thread_flag(TIF_32BIT) ? \
+ VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
+
+#define VM_STACK_DEFAULT_FLAGS \
+ (test_thread_flag(TIF_32BIT) ? \
+ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
+
#endif /* __KERNEL__ */
#endif /* _PPC64_PAGE_H */
diff -urN linux-2.5/include/asm-ppc64/pgtable.h test/include/asm-ppc64/pgtable.h
--- linux-2.5/include/asm-ppc64/pgtable.h 2005-03-07 14:01:44.000000000 +1100
+++ test/include/asm-ppc64/pgtable.h 2005-03-15 17:41:14.000000000 +1100
@@ -82,14 +82,14 @@
#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
#define _PAGE_USER 0x0002 /* matches one of the PP bits */
#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
-#define _PAGE_RW 0x0004 /* software: user write access allowed */
+#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */
#define _PAGE_GUARDED 0x0008
#define _PAGE_COHERENT 0x0010 /* M: enforce memory coherence (SMP systems) */
#define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */
#define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */
#define _PAGE_DIRTY 0x0080 /* C: page changed */
#define _PAGE_ACCESSED 0x0100 /* R: page referenced */
-#define _PAGE_EXEC 0x0200 /* software: i-cache coherence required */
+#define _PAGE_RW 0x0200 /* software: user write access allowed */
#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
@@ -118,29 +118,38 @@
#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_WRENABLE)
#define PAGE_KERNEL_CI __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
_PAGE_WRENABLE | _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_EXEC)

/*
- * The PowerPC can only do execute protection on a segment (256MB) basis,
- * not on a page basis. So we consider execute permission the same as read.
+ * This bit in a hardware PTE indicates that the page is *not* executable.
+ */
+#define HW_NO_EXEC _PAGE_EXEC
+
+/*
+ * POWER4 and newer have per page execute protection, older chips can only
+ * do this on a segment (256MB) basis.
+ *
* Also, write permissions imply read permissions.
* This is the closest we can get..
+ *
+ * Note due to the way vm flags are laid out, the bits are XWR
*/
#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY_X
+#define __P001 PAGE_READONLY
#define __P010 PAGE_COPY
#define __P011 PAGE_COPY_X
#define __P100 PAGE_READONLY
#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY
+#define __P110 PAGE_COPY_X
#define __P111 PAGE_COPY_X

#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY_X
+#define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED_X
-#define __S100 PAGE_READONLY
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_X
#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED
+#define __S110 PAGE_SHARED_X
#define __S111 PAGE_SHARED_X

#ifndef __ASSEMBLY__
@@ -438,7 +447,7 @@
static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry, int dirty)
{
unsigned long bits = pte_val(entry) &
- (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW);
+ (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
unsigned long old, tmp;

__asm__ __volatile__(
diff -urN linux-2.5/include/asm-ppc64/processor.h test/include/asm-ppc64/processor.h
--- linux-2.5/include/asm-ppc64/processor.h 2005-03-07 10:46:39.000000000 +1100
+++ test/include/asm-ppc64/processor.h 2005-03-15 17:08:21.000000000 +1100
@@ -173,6 +173,11 @@
#define SPRN_DEC 0x016 /* Decrement Register */
#define SPRN_DMISS 0x3D0 /* Data TLB Miss Register */
#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */
+#define DSISR_NOHPTE 0x40000000 /* no translation found */
+#define DSISR_PROTFAULT 0x08000000 /* protection fault */
+#define DSISR_ISSTORE 0x02000000 /* access was a store */
+#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
+#define DSISR_NOSEGMENT 0x00200000 /* STAB/SLB miss */
#define SPRN_EAR 0x11A /* External Address Register */
#define SPRN_ESR 0x3D4 /* Exception Syndrome Register */
#define ESR_IMCP 0x80000000 /* Instr. Machine Check - Protection */
diff -urN linux-2.5/include/asm-ppc64/sections.h test/include/asm-ppc64/sections.h
--- linux-2.5/include/asm-ppc64/sections.h 2004-02-12 14:57:14.000000000 +1100
+++ test/include/asm-ppc64/sections.h 2005-03-15 16:55:05.000000000 +1100
@@ -17,4 +17,13 @@
#define __openfirmware
#define __openfirmwaredata

+
+static inline int in_kernel_text(unsigned long addr)
+{
+ if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
+ return 1;
+
+ return 0;
+}
+
#endif

2005-03-16 21:59:24

by Jake Moilanen

[permalink] [raw]
Subject: Re: [PATCH 1/2] No-exec support for ppc64

On Wed, 16 Mar 2005 17:10:57 +1100
Paul Mackerras <[email protected]> wrote:

> Jake Moilanen writes:
>
> > It does not work w/o the sys_mprotect. It will hang in one of the first
> > few binaries.
>
> Hmmm, what distro is this with? I just tried a kernel with the patch
> below on a SLES9 install and a Debian install and it came up and ran
> just fine in both cases.

I'm not sure that the patch you sent is actually doing protection
correctly.

To test I commented out this line:

> +#define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack))

and then ran a non-pt_gnu_stack binary which should have executed on a non-exec
segment, it did not segfault.

> + *
> + * Note due to the way vm flags are laid out, the bits are XWR
> */
> #define __P000 PAGE_NONE
> -#define __P001 PAGE_READONLY_X
> +#define __P001 PAGE_READONLY
> #define __P010 PAGE_COPY
> #define __P011 PAGE_COPY_X
> #define __P100 PAGE_READONLY
> #define __P101 PAGE_READONLY_X
> -#define __P110 PAGE_COPY
> +#define __P110 PAGE_COPY_X
> #define __P111 PAGE_COPY_X


I think the problem was this hunk. __P011 should be PAGE_COPY and
__P100 should be PAGE_READONLY_X.

Here is a patch ontop of the last patch you sent to fix this problem and
take another crack at doing the sys_mprotect less hackish.

Signed-off-by: Jake Moilanen <[email protected]>

---

linux-2.6.11.4-paulus-moilanen/fs/binfmt_elf.c | 18 +++++++++----
linux-2.6.11.4-paulus-moilanen/include/asm-ppc64/pgtable.h | 4 +-
2 files changed, 15 insertions(+), 7 deletions(-)

diff -puN fs/binfmt_elf.c~more-nx fs/binfmt_elf.c
--- linux-2.6.11.4-paulus/fs/binfmt_elf.c~more-nx 2005-03-16 09:35:28 -06:00
+++ linux-2.6.11.4-paulus-moilanen/fs/binfmt_elf.c 2005-03-16 11:03:15 -06:00
@@ -88,7 +88,7 @@ static struct linux_binfmt elf_format =

#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE)

-static int set_brk(unsigned long start, unsigned long end)
+static int set_brk(unsigned long start, unsigned long end, int prot)
{
start = ELF_PAGEALIGN(start);
end = ELF_PAGEALIGN(end);
@@ -99,6 +99,9 @@ static int set_brk(unsigned long start,
up_write(&current->mm->mmap_sem);
if (BAD_ADDR(addr))
return addr;
+
+ sys_mprotect(start, end-start, prot);
+
}
current->mm->start_brk = current->mm->brk = end;
return 0;
@@ -529,6 +532,7 @@ static int load_elf_binary(struct linux_
struct files_struct *files;
int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
unsigned long def_flags = 0;
+ int bss_prot = 0;
struct {
struct elfhdr elf_ex;
struct elfhdr interp_elf_ex;
@@ -811,7 +815,7 @@ static int load_elf_binary(struct linux_
before this one. Map anonymous pages, if needed,
and clear the area. */
retval = set_brk (elf_bss + load_bias,
- elf_brk + load_bias);
+ elf_brk + load_bias, bss_prot);
if (retval) {
send_sig(SIGKILL, current, 0);
goto out_free_dentry;
@@ -883,15 +887,19 @@ static int load_elf_binary(struct linux_

k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;

- if (k > elf_bss)
+ if (k > elf_bss) {
elf_bss = k;
+ bss_prot = elf_prot;
+ }
if ((elf_ppnt->p_flags & PF_X) && end_code < k)
end_code = k;
if (end_data < k)
end_data = k;
k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
- if (k > elf_brk)
+ if (k > elf_brk) {
elf_brk = k;
+ bss_prot = elf_prot;
+ }
}

loc->elf_ex.e_entry += load_bias;
@@ -907,7 +915,7 @@ static int load_elf_binary(struct linux_
* mapping in the interpreter, to make sure it doesn't wind
* up getting placed where the bss needs to go.
*/
- retval = set_brk(elf_bss, elf_brk);
+ retval = set_brk(elf_bss, elf_brk, bss_prot);
if (retval) {
send_sig(SIGKILL, current, 0);
goto out_free_dentry;
diff -puN include/asm-ppc64/pgtable.h~more-nx include/asm-ppc64/pgtable.h
--- linux-2.6.11.4-paulus/include/asm-ppc64/pgtable.h~more-nx 2005-03-16 09:35:44 -06:00
+++ linux-2.6.11.4-paulus-moilanen/include/asm-ppc64/pgtable.h 2005-03-16 09:35:53 -06:00
@@ -137,8 +137,8 @@
#define __P000 PAGE_NONE
#define __P001 PAGE_READONLY
#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY_X
-#define __P100 PAGE_READONLY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_X
#define __P101 PAGE_READONLY_X
#define __P110 PAGE_COPY_X
#define __P111 PAGE_COPY_X

_