2020-07-24 09:26:08

by Michael Ellerman

[permalink] [raw]
Subject: [PATCH v2 1/5] selftests/powerpc: Add test of stack expansion logic

We have custom stack expansion checks that it turns out are extremely
badly tested and contain bugs, surprise. So add some tests that
exercise the code and capture the current boundary conditions.

The signal test currently fails on 64-bit kernels because the 2048
byte allowance for the signal frame is too small, we will fix that in
a subsequent patch.

Signed-off-by: Michael Ellerman <[email protected]>
---

v2:
- Concentrate on used stack around the 1MB size, as that's where our
custom logic kicks in.
- Increment the used stack size by 64 so we can exercise the case
where we overflow the page by less than 128 (__SIGNAL_FRAMESIZE).

---
tools/testing/selftests/powerpc/mm/.gitignore | 2 +
tools/testing/selftests/powerpc/mm/Makefile | 9 +-
.../powerpc/mm/stack_expansion_ldst.c | 233 ++++++++++++++++++
.../powerpc/mm/stack_expansion_signal.c | 118 +++++++++
tools/testing/selftests/powerpc/pmu/lib.h | 1 +
5 files changed, 362 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
create mode 100644 tools/testing/selftests/powerpc/mm/stack_expansion_signal.c

diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 8d041f508a51..52308f42b7de 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -8,3 +8,5 @@ large_vm_fork_separation
bad_accesses
tlbie_test
pkey_exec_prot
+stack_expansion_ldst
+stack_expansion_signal
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 5a86d59441dc..6cd772e0e374 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -3,7 +3,9 @@
$(MAKE) -C ../

TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot segv_errors wild_bctr \
- large_vm_fork_separation bad_accesses pkey_exec_prot
+ large_vm_fork_separation bad_accesses pkey_exec_prot stack_expansion_signal \
+ stack_expansion_ldst
+
TEST_GEN_PROGS_EXTENDED := tlbie_test
TEST_GEN_FILES := tempfile

@@ -17,6 +19,11 @@ $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
$(OUTPUT)/bad_accesses: CFLAGS += -m64
$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64

+$(OUTPUT)/stack_expansion_signal: ../utils.c ../pmu/lib.c
+
+$(OUTPUT)/stack_expansion_ldst: CFLAGS += -fno-stack-protector
+$(OUTPUT)/stack_expansion_ldst: ../utils.c
+
$(OUTPUT)/tempfile:
dd if=/dev/zero of=$@ bs=64k count=1

diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
new file mode 100644
index 000000000000..0587e11437f5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that loads/stores expand the stack segment, or trigger a SEGV, in
+ * various conditions.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#undef NDEBUG
+#include <assert.h>
+
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+volatile char *stack_top_ptr;
+volatile unsigned long stack_top_sp;
+volatile char c;
+
+enum access_type {
+ LOAD,
+ STORE,
+};
+
+/*
+ * Consume stack until the stack pointer is below @target_sp, then do an access
+ * (load or store) at offset @delta from either the base of the stack or the
+ * current stack pointer.
+ */
+__attribute__ ((noinline))
+int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta, enum access_type type)
+{
+ unsigned long target;
+ char stack_cur;
+
+ if ((unsigned long)&stack_cur > target_sp)
+ return consume_stack(target_sp, stack_high, delta, type);
+ else {
+ // We don't really need this, but without it GCC might not
+ // generate a recursive call above.
+ stack_top_ptr = &stack_cur;
+
+#ifdef __powerpc__
+ asm volatile ("mr %[sp], %%r1" : [sp] "=r" (stack_top_sp));
+#else
+ asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp));
+#endif
+
+ // Kludge, delta < 0 indicates relative to SP
+ if (delta < 0)
+ target = stack_top_sp + delta;
+ else
+ target = stack_high - delta + 1;
+
+ volatile char *p = (char *)target;
+
+ if (type == STORE)
+ *p = c;
+ else
+ c = *p;
+
+ // Do something to prevent the stack frame being popped prior to
+ // our access above.
+ getpid();
+ }
+
+ return 0;
+}
+
+static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high)
+{
+ unsigned long start, end;
+ static char buf[4096];
+ char name[128];
+ FILE *f;
+ int rc;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f) {
+ perror("fopen");
+ return -1;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n",
+ &start, &end, name);
+ if (rc == 2)
+ continue;
+
+ if (rc != 3) {
+ printf("sscanf errored\n");
+ rc = -1;
+ break;
+ }
+
+ if (strstr(name, needle)) {
+ *low = start;
+ *high = end - 1;
+ rc = 0;
+ break;
+ }
+ }
+
+ fclose(f);
+
+ return rc;
+}
+
+int child(unsigned int stack_used, int delta, enum access_type type)
+{
+ unsigned long low, stack_high;
+
+ assert(search_proc_maps("[stack]", &low, &stack_high) == 0);
+
+ assert(consume_stack(stack_high - stack_used, stack_high, delta, type) == 0);
+
+ printf("Access OK: %s delta %-7d used size 0x%06x stack high 0x%lx top_ptr %p top sp 0x%lx actual used 0x%lx\n",
+ type == LOAD ? "load" : "store", delta, stack_used, stack_high,
+ stack_top_ptr, stack_top_sp, stack_high - stack_top_sp + 1);
+
+ return 0;
+}
+
+static int test_one(unsigned int stack_used, int delta, enum access_type type)
+{
+ pid_t pid;
+ int rc;
+
+ pid = fork();
+ if (pid == 0)
+ exit(child(stack_used, delta, type));
+
+ assert(waitpid(pid, &rc, 0) != -1);
+
+ if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0)
+ return 0;
+
+ // We don't expect a non-zero exit that's not a signal
+ assert(!WIFEXITED(rc));
+
+ printf("Faulted: %s delta %-7d used size 0x%06x signal %d\n",
+ type == LOAD ? "load" : "store", delta, stack_used,
+ WTERMSIG(rc));
+
+ return 1;
+}
+
+// This is fairly arbitrary but is well below any of the targets below,
+// so that the delta between the stack pointer and the target is large.
+#define DEFAULT_SIZE (32 * _KB)
+
+static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur)
+{
+ assert(test_one(DEFAULT_SIZE, 512 * _KB, type) == 0);
+
+ // powerpc has a special case to allow up to 1MB
+ assert(test_one(DEFAULT_SIZE, 1 * _MB, type) == 0);
+
+#ifdef __powerpc__
+ // This fails on powerpc because it's > 1MB and is not a stdu &
+ // not close to r1
+ assert(test_one(DEFAULT_SIZE, 1 * _MB + 8, type) != 0);
+#else
+ assert(test_one(DEFAULT_SIZE, 1 * _MB + 8, type) == 0);
+#endif
+
+#ifdef __powerpc__
+ // Accessing way past the stack pointer is not allowed on powerpc
+ assert(test_one(DEFAULT_SIZE, rlim_cur, type) != 0);
+#else
+ // We should be able to access anywhere within the rlimit
+ assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0);
+#endif
+
+ // But if we go past the rlimit it should fail
+ assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0);
+
+ // Above 1MB powerpc only allows accesses within 2048 bytes of
+ // r1 for accesses that aren't stdu
+ assert(test_one(1 * _MB + page_size - 128, -2048, type) == 0);
+#ifdef __powerpc__
+ assert(test_one(1 * _MB + page_size - 128, -2049, type) != 0);
+#else
+ assert(test_one(1 * _MB + page_size - 128, -2049, type) == 0);
+#endif
+
+ // By consuming 2MB of stack we test the stdu case
+ assert(test_one(2 * _MB + page_size - 128, -2048, type) == 0);
+}
+
+static int test(void)
+{
+ unsigned long page_size;
+ struct rlimit rlimit;
+
+ page_size = getpagesize();
+ getrlimit(RLIMIT_STACK, &rlimit);
+ printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur);
+
+ printf("Testing loads ...\n");
+ test_one_type(LOAD, page_size, rlimit.rlim_cur);
+ printf("Testing stores ...\n");
+ test_one_type(STORE, page_size, rlimit.rlim_cur);
+
+ printf("All OK\n");
+
+ return 0;
+}
+
+#ifdef __powerpc__
+#include "utils.h"
+
+int main(void)
+{
+ return test_harness(test, "stack_expansion_ldst");
+}
+#else
+int main(void)
+{
+ return test();
+}
+#endif
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
new file mode 100644
index 000000000000..c8b32a29e274
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that signal delivery is able to expand the stack segment without
+ * triggering a SEGV.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../pmu/lib.h"
+#include "utils.h"
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+static char *stack_base_ptr;
+static char *stack_top_ptr;
+
+static volatile sig_atomic_t sig_occurred = 0;
+
+static void sigusr1_handler(int signal_arg)
+{
+ sig_occurred = 1;
+}
+
+static int consume_stack(unsigned int stack_size, union pipe write_pipe)
+{
+ char stack_cur;
+
+ if ((stack_base_ptr - &stack_cur) < stack_size)
+ return consume_stack(stack_size, write_pipe);
+ else {
+ stack_top_ptr = &stack_cur;
+
+ FAIL_IF(notify_parent(write_pipe));
+
+ while (!sig_occurred)
+ barrier();
+ }
+
+ return 0;
+}
+
+static int child(unsigned int stack_size, union pipe write_pipe)
+{
+ struct sigaction act;
+ char stack_base;
+
+ act.sa_handler = sigusr1_handler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ if (sigaction(SIGUSR1, &act, NULL) < 0)
+ err(1, "sigaction");
+
+ stack_base_ptr = (char *) (((size_t) &stack_base + 65535) & ~65535UL);
+
+ FAIL_IF(consume_stack(stack_size, write_pipe));
+
+ printf("size 0x%06x: OK, stack base %p top %p (%zx used)\n",
+ stack_size, stack_base_ptr, stack_top_ptr,
+ stack_base_ptr - stack_top_ptr);
+
+ return 0;
+}
+
+static int test_one_size(unsigned int stack_size)
+{
+ union pipe read_pipe, write_pipe;
+ pid_t pid;
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ close(read_pipe.read_fd);
+ close(write_pipe.write_fd);
+ exit(child(stack_size, read_pipe));
+ }
+
+ close(read_pipe.write_fd);
+ close(write_pipe.read_fd);
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ kill(pid, SIGUSR1);
+
+ FAIL_IF(wait_for_child(pid));
+
+ close(read_pipe.read_fd);
+ close(write_pipe.write_fd);
+
+ return 0;
+}
+
+int test(void)
+{
+ unsigned int i, size;
+
+ // Test with used stack from 1MB - 64K to 1MB + 64K
+ // Increment by 64 to get more coverage of odd sizes
+ for (i = 0; i < (128 * _KB); i += 64) {
+ size = i + (1 * _MB) - (64 * _KB);
+ FAIL_IF(test_one_size(size));
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test, "stack_expansion_signal");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
index fa12e7d0b4d3..bf1bec013bbb 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -6,6 +6,7 @@
#ifndef __SELFTESTS_POWERPC_PMU_LIB_H
#define __SELFTESTS_POWERPC_PMU_LIB_H

+#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
--
2.25.1


2020-07-24 09:26:26

by Michael Ellerman

[permalink] [raw]
Subject: [PATCH v2 2/5] powerpc: Allow 4224 bytes of stack expansion for the signal frame

We have powerpc specific logic in our page fault handling to decide if
an access to an unmapped address below the stack pointer should expand
the stack VMA.

The code was originally added in 2004 "ported from 2.4". The rough
logic is that the stack is allowed to grow to 1MB with no extra
checking. Over 1MB the access must be within 2048 bytes of the stack
pointer, or be from a user instruction that updates the stack pointer.

The 2048 byte allowance below the stack pointer is there to cover the
288 byte "red zone" as well as the "about 1.5kB" needed by the signal
delivery code.

Unfortunately since then the signal frame has expanded, and is now
4224 bytes on 64-bit kernels with transactional memory enabled. This
means if a process has consumed more than 1MB of stack, and its stack
pointer lies less than 4224 bytes from the next page boundary, signal
delivery will fault when trying to expand the stack and the process
will see a SEGV.

The total size of the signal frame is the size of struct rt_sigframe
(which includes the red zone) plus __SIGNAL_FRAMESIZE (128 bytes on
64-bit).

The 2048 byte allowance was correct until 2008 as the signal frame
was:

struct rt_sigframe {
struct ucontext uc; /* 0 1440 */
/* --- cacheline 11 boundary (1408 bytes) was 32 bytes ago --- */
long unsigned int _unused[2]; /* 1440 16 */
unsigned int tramp[6]; /* 1456 24 */
struct siginfo * pinfo; /* 1480 8 */
void * puc; /* 1488 8 */
struct siginfo info; /* 1496 128 */
/* --- cacheline 12 boundary (1536 bytes) was 88 bytes ago --- */
char abigap[288]; /* 1624 288 */

/* size: 1920, cachelines: 15, members: 7 */
/* padding: 8 */
};

1920 + 128 = 2048

Then in commit ce48b2100785 ("powerpc: Add VSX context save/restore,
ptrace and signal support") (Jul 2008) the signal frame expanded to
2304 bytes:

struct rt_sigframe {
struct ucontext uc; /* 0 1696 */ <--
/* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
long unsigned int _unused[2]; /* 1696 16 */
unsigned int tramp[6]; /* 1712 24 */
struct siginfo * pinfo; /* 1736 8 */
void * puc; /* 1744 8 */
struct siginfo info; /* 1752 128 */
/* --- cacheline 14 boundary (1792 bytes) was 88 bytes ago --- */
char abigap[288]; /* 1880 288 */

/* size: 2176, cachelines: 17, members: 7 */
/* padding: 8 */
};

2176 + 128 = 2304

At this point we should have been exposed to the bug, though as far as
I know it was never reported. I no longer have a system old enough to
easily test on.

Then in 2010 commit 320b2b8de126 ("mm: keep a guard page below a
grow-down stack segment") caused our stack expansion code to never
trigger, as there was always a VMA found for a write up to PAGE_SIZE
below r1.

That meant the bug was hidden as we continued to expand the signal
frame in commit 2b0a576d15e0 ("powerpc: Add new transactional memory
state to the signal context") (Feb 2013):

struct rt_sigframe {
struct ucontext uc; /* 0 1696 */
/* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
struct ucontext uc_transact; /* 1696 1696 */ <--
/* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
long unsigned int _unused[2]; /* 3392 16 */
unsigned int tramp[6]; /* 3408 24 */
struct siginfo * pinfo; /* 3432 8 */
void * puc; /* 3440 8 */
struct siginfo info; /* 3448 128 */
/* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
char abigap[288]; /* 3576 288 */

/* size: 3872, cachelines: 31, members: 8 */
/* padding: 8 */
/* last cacheline: 32 bytes */
};

3872 + 128 = 4000

And commit 573ebfa6601f ("powerpc: Increase stack redzone for 64-bit
userspace to 512 bytes") (Feb 2014):

struct rt_sigframe {
struct ucontext uc; /* 0 1696 */
/* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
struct ucontext uc_transact; /* 1696 1696 */
/* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
long unsigned int _unused[2]; /* 3392 16 */
unsigned int tramp[6]; /* 3408 24 */
struct siginfo * pinfo; /* 3432 8 */
void * puc; /* 3440 8 */
struct siginfo info; /* 3448 128 */
/* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
char abigap[512]; /* 3576 512 */ <--

/* size: 4096, cachelines: 32, members: 8 */
/* padding: 8 */
};

4096 + 128 = 4224

Then finally in 2017, commit 1be7107fbe18 ("mm: larger stack guard
gap, between vmas") exposed us to the existing bug, because it changed
the stack VMA to be the correct/real size, meaning our stack expansion
code is now triggered.

Fix it by increasing the allowance to 4224 bytes.

Hard-coding 4224 is obviously unsafe against future expansions of the
signal frame in the same way as the existing code. We can't easily use
sizeof() because the signal frame structure is not in a header. We
will either fix that, or rip out all the custom stack expansion
checking logic entirely.

Fixes: ce48b2100785 ("powerpc: Add VSX context save/restore, ptrace and signal support")
Cc: [email protected] # v2.6.27+
Reported-by: Tom Lane <[email protected]>
Signed-off-by: Michael Ellerman <[email protected]>
---

v2: Account for the extra 128 bytes of __SIGNAL_FRAMESIZE, making the
total size 4224, as noticed by dja.

See also https://bugzilla.kernel.org/show_bug.cgi?id=205183
---
arch/powerpc/mm/fault.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 641fc5f3d7dd..3ebb1792e636 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -267,6 +267,9 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
return false;
}

+// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE
+#define SIGFRAME_MAX_SIZE (4096 + 128)
+
static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
struct vm_area_struct *vma, unsigned int flags,
bool *must_retry)
@@ -274,7 +277,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
/*
* N.B. The POWER/Open ABI allows programs to access up to
* 288 bytes below the stack pointer.
- * The kernel signal delivery code writes up to about 1.5kB
+ * The kernel signal delivery code writes a bit over 4KB
* below the stack pointer (r1) before decrementing it.
* The exec code can write slightly over 640kB to the stack
* before setting the user r1. Thus we allow the stack to
@@ -299,7 +302,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
* between the last mapped region and the stack will
* expand the stack rather than segfaulting.
*/
- if (address + 2048 >= uregs->gpr[1])
+ if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1])
return false;

if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
--
2.25.1

2020-07-24 09:28:03

by Michael Ellerman

[permalink] [raw]
Subject: [PATCH v2 5/5] selftests/powerpc: Remove powerpc special cases from stack expansion test

Now that the powerpc code behaves the same as other architectures we
can drop the special cases we had.

Signed-off-by: Michael Ellerman <[email protected]>
---
.../powerpc/mm/stack_expansion_ldst.c | 41 +++----------------
1 file changed, 5 insertions(+), 36 deletions(-)

v2: no change just rebased.

diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
index 8dbfb51acf0f..ed9143990888 100644
--- a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
@@ -56,13 +56,7 @@ int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta,
#else
asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp));
#endif
-
- // Kludge, delta < 0 indicates relative to SP
- if (delta < 0)
- target = stack_top_sp + delta;
- else
- target = stack_high - delta + 1;
-
+ target = stack_high - delta + 1;
volatile char *p = (char *)target;

if (type == STORE)
@@ -162,41 +156,16 @@ static int test_one(unsigned int stack_used, int delta, enum access_type type)

static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur)
{
- assert(test_one(DEFAULT_SIZE, 512 * _KB, type) == 0);
+ unsigned long delta;

- // powerpc has a special case to allow up to 1MB
- assert(test_one(DEFAULT_SIZE, 1 * _MB, type) == 0);
-
-#ifdef __powerpc__
- // This fails on powerpc because it's > 1MB and is not a stdu &
- // not close to r1
- assert(test_one(DEFAULT_SIZE, 1 * _MB + 8, type) != 0);
-#else
- assert(test_one(DEFAULT_SIZE, 1 * _MB + 8, type) == 0);
-#endif
-
-#ifdef __powerpc__
- // Accessing way past the stack pointer is not allowed on powerpc
- assert(test_one(DEFAULT_SIZE, rlim_cur, type) != 0);
-#else
// We should be able to access anywhere within the rlimit
+ for (delta = page_size; delta <= rlim_cur; delta += page_size)
+ assert(test_one(DEFAULT_SIZE, delta, type) == 0);
+
assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0);
-#endif

// But if we go past the rlimit it should fail
assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0);
-
- // Above 1MB powerpc only allows accesses within 4224 bytes of
- // r1 for accesses that aren't stdu
- assert(test_one(1 * _MB + page_size - 128, -4224, type) == 0);
-#ifdef __powerpc__
- assert(test_one(1 * _MB + page_size - 128, -4225, type) != 0);
-#else
- assert(test_one(1 * _MB + page_size - 128, -4225, type) == 0);
-#endif
-
- // By consuming 2MB of stack we test the stdu case
- assert(test_one(2 * _MB + page_size - 128, -4224, type) == 0);
}

static int test(void)
--
2.25.1

2020-07-24 09:28:18

by Michael Ellerman

[permalink] [raw]
Subject: [PATCH v2 4/5] powerpc/mm: Remove custom stack expansion checking

We have powerpc specific logic in our page fault handling to decide if
an access to an unmapped address below the stack pointer should expand
the stack VMA.

The logic aims to prevent userspace from doing bad accesses below the
stack pointer. However as long as the stack is < 1MB in size, we allow
all accesses without further checks. Adding some debug I see that I
can do a full kernel build and LTP run, and not a single process has
used more than 1MB of stack. So for the majority of processes the
logic never even fires.

We also recently found a nasty bug in this code which could cause
userspace programs to be killed during signal delivery. It went
unnoticed presumably because most processes use < 1MB of stack.

The generic mm code has also grown support for stack guard pages since
this code was originally written, so the most heinous case of the
stack expanding into other mappings is now handled for us.

Finally although some other arches have special logic in this path,
from what I can tell none of x86, arm64, arm and s390 impose any extra
checks other than those in expand_stack().

So drop our complicated logic and like other architectures just let
the stack expand as long as its within the rlimit.

Signed-off-by: Michael Ellerman <[email protected]>
---
arch/powerpc/mm/fault.c | 109 ++--------------------------------------
1 file changed, 5 insertions(+), 104 deletions(-)

v2: no change just rebased.

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 3ebb1792e636..925a7231abb3 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -42,39 +42,7 @@
#include <asm/kup.h>
#include <asm/inst.h>

-/*
- * Check whether the instruction inst is a store using
- * an update addressing form which will update r1.
- */
-static bool store_updates_sp(struct ppc_inst inst)
-{
- /* check for 1 in the rA field */
- if (((ppc_inst_val(inst) >> 16) & 0x1f) != 1)
- return false;
- /* check major opcode */
- switch (ppc_inst_primary_opcode(inst)) {
- case OP_STWU:
- case OP_STBU:
- case OP_STHU:
- case OP_STFSU:
- case OP_STFDU:
- return true;
- case OP_STD: /* std or stdu */
- return (ppc_inst_val(inst) & 3) == 1;
- case OP_31:
- /* check minor opcode */
- switch ((ppc_inst_val(inst) >> 1) & 0x3ff) {
- case OP_31_XOP_STDUX:
- case OP_31_XOP_STWUX:
- case OP_31_XOP_STBUX:
- case OP_31_XOP_STHUX:
- case OP_31_XOP_STFSUX:
- case OP_31_XOP_STFDUX:
- return true;
- }
- }
- return false;
-}
+
/*
* do_page_fault error handling helpers
*/
@@ -267,57 +235,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
return false;
}

-// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE
-#define SIGFRAME_MAX_SIZE (4096 + 128)
-
-static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
- struct vm_area_struct *vma, unsigned int flags,
- bool *must_retry)
-{
- /*
- * N.B. The POWER/Open ABI allows programs to access up to
- * 288 bytes below the stack pointer.
- * The kernel signal delivery code writes a bit over 4KB
- * below the stack pointer (r1) before decrementing it.
- * The exec code can write slightly over 640kB to the stack
- * before setting the user r1. Thus we allow the stack to
- * expand to 1MB without further checks.
- */
- if (address + 0x100000 < vma->vm_end) {
- struct ppc_inst __user *nip = (struct ppc_inst __user *)regs->nip;
- /* get user regs even if this fault is in kernel mode */
- struct pt_regs *uregs = current->thread.regs;
- if (uregs == NULL)
- return true;
-
- /*
- * A user-mode access to an address a long way below
- * the stack pointer is only valid if the instruction
- * is one which would update the stack pointer to the
- * address accessed if the instruction completed,
- * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
- * (or the byte, halfword, float or double forms).
- *
- * If we don't check this then any write to the area
- * between the last mapped region and the stack will
- * expand the stack rather than segfaulting.
- */
- if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1])
- return false;
-
- if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
- access_ok(nip, sizeof(*nip))) {
- struct ppc_inst inst;
-
- if (!probe_user_read_inst(&inst, nip))
- return !store_updates_sp(inst);
- *must_retry = true;
- }
- return true;
- }
- return false;
-}
-
#ifdef CONFIG_PPC_MEM_KEYS
static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
struct vm_area_struct *vma)
@@ -483,7 +400,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
int is_user = user_mode(regs);
int is_write = page_fault_is_write(error_code);
vm_fault_t fault, major = 0;
- bool must_retry = false;
bool kprobe_fault = kprobe_page_fault(regs, 11);

if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
@@ -572,30 +488,15 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
vma = find_vma(mm, address);
if (unlikely(!vma))
return bad_area(regs, address);
- if (likely(vma->vm_start <= address))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
- return bad_area(regs, address);

- /* The stack is being expanded, check if it's valid */
- if (unlikely(bad_stack_expansion(regs, address, vma, flags,
- &must_retry))) {
- if (!must_retry)
+ if (unlikely(vma->vm_start > address)) {
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
return bad_area(regs, address);

- mmap_read_unlock(mm);
- if (fault_in_pages_readable((const char __user *)regs->nip,
- sizeof(unsigned int)))
- return bad_area_nosemaphore(regs, address);
- goto retry;
+ if (unlikely(expand_stack(vma, address)))
+ return bad_area(regs, address);
}

- /* Try to expand it */
- if (unlikely(expand_stack(vma, address)))
- return bad_area(regs, address);
-
-good_area:
-
#ifdef CONFIG_PPC_MEM_KEYS
if (unlikely(access_pkey_error(is_write, is_exec,
(error_code & DSISR_KEYFAULT), vma)))
--
2.25.1

2020-07-27 08:37:41

by Gabriel Paubert

[permalink] [raw]
Subject: Re: [PATCH v2 2/5] powerpc: Allow 4224 bytes of stack expansion for the signal frame

On Fri, Jul 24, 2020 at 07:25:25PM +1000, Michael Ellerman wrote:
> We have powerpc specific logic in our page fault handling to decide if
> an access to an unmapped address below the stack pointer should expand
> the stack VMA.
>
> The code was originally added in 2004 "ported from 2.4". The rough
> logic is that the stack is allowed to grow to 1MB with no extra
> checking. Over 1MB the access must be within 2048 bytes of the stack
> pointer, or be from a user instruction that updates the stack pointer.
>
> The 2048 byte allowance below the stack pointer is there to cover the
> 288 byte "red zone" as well as the "about 1.5kB" needed by the signal
> delivery code.
>
> Unfortunately since then the signal frame has expanded, and is now
> 4224 bytes on 64-bit kernels with transactional memory enabled.

Are there really users of transactional memory in the wild?

Just asking because Power10 removes TM, and Power9 has had some issues
with it AFAICT.

Getting rid of it (if possible) would result in smaller signal frames,
with simpler signal delivery code (probably slightly faster also).

Gabriel


2020-07-27 11:07:21

by Daniel Axtens

[permalink] [raw]
Subject: Re: [PATCH v2 2/5] powerpc: Allow 4224 bytes of stack expansion for the signal frame

Hi Michael,

I have tested this with the test from the bug and it now seems to pass
fine. On that basis:

Tested-by: Daniel Axtens <[email protected]>

Thank you for coming up with a better solution than my gross hack!

Kind regards,
Daniel

> We have powerpc specific logic in our page fault handling to decide if
> an access to an unmapped address below the stack pointer should expand
> the stack VMA.
>
> The code was originally added in 2004 "ported from 2.4". The rough
> logic is that the stack is allowed to grow to 1MB with no extra
> checking. Over 1MB the access must be within 2048 bytes of the stack
> pointer, or be from a user instruction that updates the stack pointer.
>
> The 2048 byte allowance below the stack pointer is there to cover the
> 288 byte "red zone" as well as the "about 1.5kB" needed by the signal
> delivery code.
>
> Unfortunately since then the signal frame has expanded, and is now
> 4224 bytes on 64-bit kernels with transactional memory enabled. This
> means if a process has consumed more than 1MB of stack, and its stack
> pointer lies less than 4224 bytes from the next page boundary, signal
> delivery will fault when trying to expand the stack and the process
> will see a SEGV.
>
> The total size of the signal frame is the size of struct rt_sigframe
> (which includes the red zone) plus __SIGNAL_FRAMESIZE (128 bytes on
> 64-bit).
>
> The 2048 byte allowance was correct until 2008 as the signal frame
> was:
>
> struct rt_sigframe {
> struct ucontext uc; /* 0 1440 */
> /* --- cacheline 11 boundary (1408 bytes) was 32 bytes ago --- */
> long unsigned int _unused[2]; /* 1440 16 */
> unsigned int tramp[6]; /* 1456 24 */
> struct siginfo * pinfo; /* 1480 8 */
> void * puc; /* 1488 8 */
> struct siginfo info; /* 1496 128 */
> /* --- cacheline 12 boundary (1536 bytes) was 88 bytes ago --- */
> char abigap[288]; /* 1624 288 */
>
> /* size: 1920, cachelines: 15, members: 7 */
> /* padding: 8 */
> };
>
> 1920 + 128 = 2048
>
> Then in commit ce48b2100785 ("powerpc: Add VSX context save/restore,
> ptrace and signal support") (Jul 2008) the signal frame expanded to
> 2304 bytes:
>
> struct rt_sigframe {
> struct ucontext uc; /* 0 1696 */ <--
> /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
> long unsigned int _unused[2]; /* 1696 16 */
> unsigned int tramp[6]; /* 1712 24 */
> struct siginfo * pinfo; /* 1736 8 */
> void * puc; /* 1744 8 */
> struct siginfo info; /* 1752 128 */
> /* --- cacheline 14 boundary (1792 bytes) was 88 bytes ago --- */
> char abigap[288]; /* 1880 288 */
>
> /* size: 2176, cachelines: 17, members: 7 */
> /* padding: 8 */
> };
>
> 2176 + 128 = 2304
>
> At this point we should have been exposed to the bug, though as far as
> I know it was never reported. I no longer have a system old enough to
> easily test on.
>
> Then in 2010 commit 320b2b8de126 ("mm: keep a guard page below a
> grow-down stack segment") caused our stack expansion code to never
> trigger, as there was always a VMA found for a write up to PAGE_SIZE
> below r1.
>
> That meant the bug was hidden as we continued to expand the signal
> frame in commit 2b0a576d15e0 ("powerpc: Add new transactional memory
> state to the signal context") (Feb 2013):
>
> struct rt_sigframe {
> struct ucontext uc; /* 0 1696 */
> /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
> struct ucontext uc_transact; /* 1696 1696 */ <--
> /* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
> long unsigned int _unused[2]; /* 3392 16 */
> unsigned int tramp[6]; /* 3408 24 */
> struct siginfo * pinfo; /* 3432 8 */
> void * puc; /* 3440 8 */
> struct siginfo info; /* 3448 128 */
> /* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
> char abigap[288]; /* 3576 288 */
>
> /* size: 3872, cachelines: 31, members: 8 */
> /* padding: 8 */
> /* last cacheline: 32 bytes */
> };
>
> 3872 + 128 = 4000
>
> And commit 573ebfa6601f ("powerpc: Increase stack redzone for 64-bit
> userspace to 512 bytes") (Feb 2014):
>
> struct rt_sigframe {
> struct ucontext uc; /* 0 1696 */
> /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */
> struct ucontext uc_transact; /* 1696 1696 */
> /* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */
> long unsigned int _unused[2]; /* 3392 16 */
> unsigned int tramp[6]; /* 3408 24 */
> struct siginfo * pinfo; /* 3432 8 */
> void * puc; /* 3440 8 */
> struct siginfo info; /* 3448 128 */
> /* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */
> char abigap[512]; /* 3576 512 */ <--
>
> /* size: 4096, cachelines: 32, members: 8 */
> /* padding: 8 */
> };
>
> 4096 + 128 = 4224
>
> Then finally in 2017, commit 1be7107fbe18 ("mm: larger stack guard
> gap, between vmas") exposed us to the existing bug, because it changed
> the stack VMA to be the correct/real size, meaning our stack expansion
> code is now triggered.
>
> Fix it by increasing the allowance to 4224 bytes.
>
> Hard-coding 4224 is obviously unsafe against future expansions of the
> signal frame in the same way as the existing code. We can't easily use
> sizeof() because the signal frame structure is not in a header. We
> will either fix that, or rip out all the custom stack expansion
> checking logic entirely.
>
> Fixes: ce48b2100785 ("powerpc: Add VSX context save/restore, ptrace and signal support")
> Cc: [email protected] # v2.6.27+
> Reported-by: Tom Lane <[email protected]>
> Signed-off-by: Michael Ellerman <[email protected]>
> ---
>
> v2: Account for the extra 128 bytes of __SIGNAL_FRAMESIZE, making the
> total size 4224, as noticed by dja.
>
> See also https://bugzilla.kernel.org/show_bug.cgi?id=205183
> ---
> arch/powerpc/mm/fault.c | 7 +++++--
> 1 file changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 641fc5f3d7dd..3ebb1792e636 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -267,6 +267,9 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
> return false;
> }
>
> +// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE
> +#define SIGFRAME_MAX_SIZE (4096 + 128)
> +
> static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
> struct vm_area_struct *vma, unsigned int flags,
> bool *must_retry)
> @@ -274,7 +277,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
> /*
> * N.B. The POWER/Open ABI allows programs to access up to
> * 288 bytes below the stack pointer.
> - * The kernel signal delivery code writes up to about 1.5kB
> + * The kernel signal delivery code writes a bit over 4KB
> * below the stack pointer (r1) before decrementing it.
> * The exec code can write slightly over 640kB to the stack
> * before setting the user r1. Thus we allow the stack to
> @@ -299,7 +302,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
> * between the last mapped region and the stack will
> * expand the stack rather than segfaulting.
> */
> - if (address + 2048 >= uregs->gpr[1])
> + if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1])
> return false;
>
> if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
> --
> 2.25.1

2020-07-27 12:29:04

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v2 2/5] powerpc: Allow 4224 bytes of stack expansion for the signal frame

Gabriel Paubert <[email protected]> writes:
> On Fri, Jul 24, 2020 at 07:25:25PM +1000, Michael Ellerman wrote:
>> We have powerpc specific logic in our page fault handling to decide if
>> an access to an unmapped address below the stack pointer should expand
>> the stack VMA.
>>
>> The code was originally added in 2004 "ported from 2.4". The rough
>> logic is that the stack is allowed to grow to 1MB with no extra
>> checking. Over 1MB the access must be within 2048 bytes of the stack
>> pointer, or be from a user instruction that updates the stack pointer.
>>
>> The 2048 byte allowance below the stack pointer is there to cover the
>> 288 byte "red zone" as well as the "about 1.5kB" needed by the signal
>> delivery code.
>>
>> Unfortunately since then the signal frame has expanded, and is now
>> 4224 bytes on 64-bit kernels with transactional memory enabled.
>
> Are there really users of transactional memory in the wild?

Not many that I've heard of, but some.

Though anything that does use it needs to be written to fallback to
regular locking if TM is not available anyway.

> Just asking because Power10 removes TM, and Power9 has had some issues
> with it AFAICT.

It varies on different Power9 chip levels. For guests it should work.

> Getting rid of it (if possible) would result in smaller signal frames,
> with simpler signal delivery code (probably slightly faster also).

All the kernel code should be behind CONFIG_PPC_TRANSACTIONAL_MEM.

Deciding to disable that is really a distro decision.

In upstream we tend not to drop support for existing hardware while
people are still using it. But we could make a special case for TM,
because it's quite intrusive. I think we'd wait for a major distro to
ship without TM enabled before we did that though.

cheers

2020-07-27 14:03:22

by Daniel Axtens

[permalink] [raw]
Subject: Re: [PATCH v2 4/5] powerpc/mm: Remove custom stack expansion checking

Hi Michael,

I tested v1 of this. I ran the test from the bug with a range of stack
sizes, in a loop, for several hours and didn't see any crashes/signal
delivery failures.

I retested v2 for a few minutes just to be sure, and I ran stress-ng's
stack, stackmmap and bad-altstack stressors to make sure no obvious
kernel bugs were exposed. Nothing crashed.

All tests done on a P8 LE guest under KVM.

On that basis:

Tested-by: Daniel Axtens <[email protected]>

The more I look at this the less qualified I feel to Review it, but
certainly it looks better than my ugly hack from late last year.

Kind regards,
Daniel

> We have powerpc specific logic in our page fault handling to decide if
> an access to an unmapped address below the stack pointer should expand
> the stack VMA.
>
> The logic aims to prevent userspace from doing bad accesses below the
> stack pointer. However as long as the stack is < 1MB in size, we allow
> all accesses without further checks. Adding some debug I see that I
> can do a full kernel build and LTP run, and not a single process has
> used more than 1MB of stack. So for the majority of processes the
> logic never even fires.
>
> We also recently found a nasty bug in this code which could cause
> userspace programs to be killed during signal delivery. It went
> unnoticed presumably because most processes use < 1MB of stack.
>
> The generic mm code has also grown support for stack guard pages since
> this code was originally written, so the most heinous case of the
> stack expanding into other mappings is now handled for us.
>
> Finally although some other arches have special logic in this path,
> from what I can tell none of x86, arm64, arm and s390 impose any extra
> checks other than those in expand_stack().
>
> So drop our complicated logic and like other architectures just let
> the stack expand as long as its within the rlimit.
>
> Signed-off-by: Michael Ellerman <[email protected]>
> ---
> arch/powerpc/mm/fault.c | 109 ++--------------------------------------
> 1 file changed, 5 insertions(+), 104 deletions(-)
>
> v2: no change just rebased.
>
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 3ebb1792e636..925a7231abb3 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -42,39 +42,7 @@
> #include <asm/kup.h>
> #include <asm/inst.h>
>
> -/*
> - * Check whether the instruction inst is a store using
> - * an update addressing form which will update r1.
> - */
> -static bool store_updates_sp(struct ppc_inst inst)
> -{
> - /* check for 1 in the rA field */
> - if (((ppc_inst_val(inst) >> 16) & 0x1f) != 1)
> - return false;
> - /* check major opcode */
> - switch (ppc_inst_primary_opcode(inst)) {
> - case OP_STWU:
> - case OP_STBU:
> - case OP_STHU:
> - case OP_STFSU:
> - case OP_STFDU:
> - return true;
> - case OP_STD: /* std or stdu */
> - return (ppc_inst_val(inst) & 3) == 1;
> - case OP_31:
> - /* check minor opcode */
> - switch ((ppc_inst_val(inst) >> 1) & 0x3ff) {
> - case OP_31_XOP_STDUX:
> - case OP_31_XOP_STWUX:
> - case OP_31_XOP_STBUX:
> - case OP_31_XOP_STHUX:
> - case OP_31_XOP_STFSUX:
> - case OP_31_XOP_STFDUX:
> - return true;
> - }
> - }
> - return false;
> -}
> +
> /*
> * do_page_fault error handling helpers
> */
> @@ -267,57 +235,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
> return false;
> }
>
> -// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE
> -#define SIGFRAME_MAX_SIZE (4096 + 128)
> -
> -static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
> - struct vm_area_struct *vma, unsigned int flags,
> - bool *must_retry)
> -{
> - /*
> - * N.B. The POWER/Open ABI allows programs to access up to
> - * 288 bytes below the stack pointer.
> - * The kernel signal delivery code writes a bit over 4KB
> - * below the stack pointer (r1) before decrementing it.
> - * The exec code can write slightly over 640kB to the stack
> - * before setting the user r1. Thus we allow the stack to
> - * expand to 1MB without further checks.
> - */
> - if (address + 0x100000 < vma->vm_end) {
> - struct ppc_inst __user *nip = (struct ppc_inst __user *)regs->nip;
> - /* get user regs even if this fault is in kernel mode */
> - struct pt_regs *uregs = current->thread.regs;
> - if (uregs == NULL)
> - return true;
> -
> - /*
> - * A user-mode access to an address a long way below
> - * the stack pointer is only valid if the instruction
> - * is one which would update the stack pointer to the
> - * address accessed if the instruction completed,
> - * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
> - * (or the byte, halfword, float or double forms).
> - *
> - * If we don't check this then any write to the area
> - * between the last mapped region and the stack will
> - * expand the stack rather than segfaulting.
> - */
> - if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1])
> - return false;
> -
> - if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
> - access_ok(nip, sizeof(*nip))) {
> - struct ppc_inst inst;
> -
> - if (!probe_user_read_inst(&inst, nip))
> - return !store_updates_sp(inst);
> - *must_retry = true;
> - }
> - return true;
> - }
> - return false;
> -}
> -
> #ifdef CONFIG_PPC_MEM_KEYS
> static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
> struct vm_area_struct *vma)
> @@ -483,7 +400,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
> int is_user = user_mode(regs);
> int is_write = page_fault_is_write(error_code);
> vm_fault_t fault, major = 0;
> - bool must_retry = false;
> bool kprobe_fault = kprobe_page_fault(regs, 11);
>
> if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
> @@ -572,30 +488,15 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
> vma = find_vma(mm, address);
> if (unlikely(!vma))
> return bad_area(regs, address);
> - if (likely(vma->vm_start <= address))
> - goto good_area;
> - if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
> - return bad_area(regs, address);
>
> - /* The stack is being expanded, check if it's valid */
> - if (unlikely(bad_stack_expansion(regs, address, vma, flags,
> - &must_retry))) {
> - if (!must_retry)
> + if (unlikely(vma->vm_start > address)) {
> + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
> return bad_area(regs, address);
>
> - mmap_read_unlock(mm);
> - if (fault_in_pages_readable((const char __user *)regs->nip,
> - sizeof(unsigned int)))
> - return bad_area_nosemaphore(regs, address);
> - goto retry;
> + if (unlikely(expand_stack(vma, address)))
> + return bad_area(regs, address);
> }
>
> - /* Try to expand it */
> - if (unlikely(expand_stack(vma, address)))
> - return bad_area(regs, address);
> -
> -good_area:
> -
> #ifdef CONFIG_PPC_MEM_KEYS
> if (unlikely(access_pkey_error(is_write, is_exec,
> (error_code & DSISR_KEYFAULT), vma)))
> --
> 2.25.1

2020-07-28 02:33:37

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v2 4/5] powerpc/mm: Remove custom stack expansion checking

Daniel Axtens <[email protected]> writes:
> Hi Michael,
>
> I tested v1 of this. I ran the test from the bug with a range of stack
> sizes, in a loop, for several hours and didn't see any crashes/signal
> delivery failures.
>
> I retested v2 for a few minutes just to be sure, and I ran stress-ng's
> stack, stackmmap and bad-altstack stressors to make sure no obvious
> kernel bugs were exposed. Nothing crashed.
>
> All tests done on a P8 LE guest under KVM.
>
> On that basis:
>
> Tested-by: Daniel Axtens <[email protected]>

Thanks.

Always nice to have someone review my patches!

cheers

2020-07-30 12:53:46

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] selftests/powerpc: Add test of stack expansion logic

On Fri, 24 Jul 2020 19:25:24 +1000, Michael Ellerman wrote:
> We have custom stack expansion checks that it turns out are extremely
> badly tested and contain bugs, surprise. So add some tests that
> exercise the code and capture the current boundary conditions.
>
> The signal test currently fails on 64-bit kernels because the 2048
> byte allowance for the signal frame is too small, we will fix that in
> a subsequent patch.

Applied to powerpc/next.

[1/5] selftests/powerpc: Add test of stack expansion logic
https://git.kernel.org/powerpc/c/c9938a9dac95be7650218cdd8e9d1f882e7b5691
[2/5] powerpc: Allow 4224 bytes of stack expansion for the signal frame
https://git.kernel.org/powerpc/c/63dee5df43a31f3844efabc58972f0a206ca4534
[3/5] selftests/powerpc: Update the stack expansion test
https://git.kernel.org/powerpc/c/9ee571d84bf8cfdd587a1acbf3490ca90fc40c9d
[4/5] powerpc/mm: Remove custom stack expansion checking
https://git.kernel.org/powerpc/c/773b3e53df5b84e73bf64998e4019f50a6662ad1
[5/5] selftests/powerpc: Remove powerpc special cases from stack expansion test
https://git.kernel.org/powerpc/c/73da08f6966b81feb429af4fb3229da4cf21d6d9

cheers