2020-01-24 11:56:58

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v4 1/7] readdir: make user_access_begin() use the real access range

From: Linus Torvalds <[email protected]>

In commit 9f79b78ef744 ("Convert filldir[64]() from __put_user() to
unsafe_put_user()") I changed filldir to not do individual __put_user()
accesses, but instead use unsafe_put_user() surrounded by the proper
user_access_begin/end() pair.

That make them enormously faster on modern x86, where the STAC/CLAC
games make individual user accesses fairly heavy-weight.

However, the user_access_begin() range was not really the exact right
one, since filldir() has the unfortunate problem that it needs to not
only fill out the new directory entry, it also needs to fix up the
previous one to contain the proper file offset.

It's unfortunate, but the "d_off" field in "struct dirent" is _not_ the
file offset of the directory entry itself - it's the offset of the next
one. So we end up backfilling the offset in the previous entry as we
walk along.

But since x86 didn't really care about the exact range, and used to be
the only architecture that did anything fancy in user_access_begin() to
begin with, the filldir[64]() changes did something lazy, and even
commented on it:

/*
* Note! This range-checks 'previous' (which may be NULL).
* The real range was checked in getdents
*/
if (!user_access_begin(dirent, sizeof(*dirent)))
goto efault;

and it all worked fine.

But now 32-bit ppc is starting to also implement user_access_begin(),
and the fact that we faked the range to only be the (possibly not even
valid) previous directory entry becomes a problem, because ppc32 will
actually be using the range that is passed in for more than just "check
that it's user space".

This is a complete rewrite of Christophe's original patch.

By saving off the record length of the previous entry instead of a
pointer to it in the filldir data structures, we can simplify the range
check and the writing of the previous entry d_off field. No need for
any conditionals in the user accesses themselves, although we retain the
conditional EINTR checking for the "was this the first directory entry"
signal handling latency logic.

Fixes: 9f79b78ef744 ("Convert filldir[64]() from __put_user() to unsafe_put_user()")
Link: https://lore.kernel.org/lkml/a02d3426f93f7eb04960a4d9140902d278cab0bb.1579697910.git.christophe.leroy@c-s.fr/
Link: https://lore.kernel.org/lkml/408c90c4068b00ea8f1c41cca45b84ec23d4946b.1579783936.git.christophe.leroy@c-s.fr/
Reported-and-tested-by: Christophe Leroy <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
Signed-off-by: Christophe Leroy <[email protected]>
---
v4: taken from Linus' tree
---
fs/readdir.c | 73 +++++++++++++++++++++++++---------------------------
1 file changed, 35 insertions(+), 38 deletions(-)

diff --git a/fs/readdir.c b/fs/readdir.c
index d26d5ea4de7b..d5ee72280c82 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -206,7 +206,7 @@ struct linux_dirent {
struct getdents_callback {
struct dir_context ctx;
struct linux_dirent __user * current_dir;
- struct linux_dirent __user * previous;
+ int prev_reclen;
int count;
int error;
};
@@ -214,12 +214,13 @@ struct getdents_callback {
static int filldir(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
- struct linux_dirent __user * dirent;
+ struct linux_dirent __user *dirent, *prev;
struct getdents_callback *buf =
container_of(ctx, struct getdents_callback, ctx);
unsigned long d_ino;
int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
sizeof(long));
+ int prev_reclen;

buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
@@ -232,28 +233,24 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
buf->error = -EOVERFLOW;
return -EOVERFLOW;
}
- dirent = buf->previous;
- if (dirent && signal_pending(current))
+ prev_reclen = buf->prev_reclen;
+ if (prev_reclen && signal_pending(current))
return -EINTR;
-
- /*
- * Note! This range-checks 'previous' (which may be NULL).
- * The real range was checked in getdents
- */
- if (!user_access_begin(dirent, sizeof(*dirent)))
- goto efault;
- if (dirent)
- unsafe_put_user(offset, &dirent->d_off, efault_end);
dirent = buf->current_dir;
+ prev = (void __user *) dirent - prev_reclen;
+ if (!user_access_begin(prev, reclen + prev_reclen))
+ goto efault;
+
+ /* This might be 'dirent->d_off', but if so it will get overwritten */
+ unsafe_put_user(offset, &prev->d_off, efault_end);
unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
user_access_end();

- buf->previous = dirent;
- dirent = (void __user *)dirent + reclen;
- buf->current_dir = dirent;
+ buf->current_dir = (void __user *)dirent + reclen;
+ buf->prev_reclen = reclen;
buf->count -= reclen;
return 0;
efault_end:
@@ -267,7 +264,6 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
struct linux_dirent __user *, dirent, unsigned int, count)
{
struct fd f;
- struct linux_dirent __user * lastdirent;
struct getdents_callback buf = {
.ctx.actor = filldir,
.count = count,
@@ -285,8 +281,10 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
error = iterate_dir(f.file, &buf.ctx);
if (error >= 0)
error = buf.error;
- lastdirent = buf.previous;
- if (lastdirent) {
+ if (buf.prev_reclen) {
+ struct linux_dirent __user * lastdirent;
+ lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
+
if (put_user(buf.ctx.pos, &lastdirent->d_off))
error = -EFAULT;
else
@@ -299,7 +297,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
struct getdents_callback64 {
struct dir_context ctx;
struct linux_dirent64 __user * current_dir;
- struct linux_dirent64 __user * previous;
+ int prev_reclen;
int count;
int error;
};
@@ -307,11 +305,12 @@ struct getdents_callback64 {
static int filldir64(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
- struct linux_dirent64 __user *dirent;
+ struct linux_dirent64 __user *dirent, *prev;
struct getdents_callback64 *buf =
container_of(ctx, struct getdents_callback64, ctx);
int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
sizeof(u64));
+ int prev_reclen;

buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
@@ -319,30 +318,27 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
return -EINVAL;
- dirent = buf->previous;
- if (dirent && signal_pending(current))
+ prev_reclen = buf->prev_reclen;
+ if (prev_reclen && signal_pending(current))
return -EINTR;
-
- /*
- * Note! This range-checks 'previous' (which may be NULL).
- * The real range was checked in getdents
- */
- if (!user_access_begin(dirent, sizeof(*dirent)))
- goto efault;
- if (dirent)
- unsafe_put_user(offset, &dirent->d_off, efault_end);
dirent = buf->current_dir;
+ prev = (void __user *)dirent - prev_reclen;
+ if (!user_access_begin(prev, reclen + prev_reclen))
+ goto efault;
+
+ /* This might be 'dirent->d_off', but if so it will get overwritten */
+ unsafe_put_user(offset, &prev->d_off, efault_end);
unsafe_put_user(ino, &dirent->d_ino, efault_end);
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
unsafe_put_user(d_type, &dirent->d_type, efault_end);
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
user_access_end();

- buf->previous = dirent;
- dirent = (void __user *)dirent + reclen;
- buf->current_dir = dirent;
+ buf->prev_reclen = reclen;
+ buf->current_dir = (void __user *)dirent + reclen;
buf->count -= reclen;
return 0;
+
efault_end:
user_access_end();
efault:
@@ -354,7 +350,6 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
unsigned int count)
{
struct fd f;
- struct linux_dirent64 __user * lastdirent;
struct getdents_callback64 buf = {
.ctx.actor = filldir64,
.count = count,
@@ -372,9 +367,11 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
error = iterate_dir(f.file, &buf.ctx);
if (error >= 0)
error = buf.error;
- lastdirent = buf.previous;
- if (lastdirent) {
+ if (buf.prev_reclen) {
+ struct linux_dirent64 __user * lastdirent;
typeof(lastdirent->d_off) d_off = buf.ctx.pos;
+
+ lastdirent = (void __user *) buf.current_dir - buf.prev_reclen;
if (__put_user(d_off, &lastdirent->d_off))
error = -EFAULT;
else
--
2.25.0


2020-01-24 11:57:17

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v4 4/7] powerpc/32s: Drop NULL addr verification

NULL addr is a user address. Don't waste time checking it. If
someone tries to access it, it will SIGFAULT the same way as for
address 1, so no need to make it special.

The special case is when not doing a write, in that case we want
to drop the entire function. This is now handled by 'dir' param
and not by the nulity of 'to' anymore.

Also make beginning of prevent_user_access() similar
to beginning of allow_user_access(), and tell the compiler
that writing in kernel space or with a 0 length is unlikely

Signed-off-by: Christophe Leroy <[email protected]>
Signed-off-by: Michael Ellerman <[email protected]>
Link: https://lore.kernel.org/r/d79cb9f680f4e971e05262303103a4b94baa91ce.1579715466.git.christophe.leroy@c-s.fr
---
v4: taken from powerpc/merge-test
---
arch/powerpc/include/asm/book3s/32/kup.h | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 91c8f1d9bcee..de29fb752cca 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -113,7 +113,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user

addr = (__force u32)to;

- if (!addr || addr >= TASK_SIZE || !size)
+ if (unlikely(addr >= TASK_SIZE || !size))
return;

end = min(addr + size, TASK_SIZE);
@@ -124,16 +124,18 @@ static __always_inline void allow_user_access(void __user *to, const void __user
static __always_inline void prevent_user_access(void __user *to, const void __user *from,
u32 size, unsigned long dir)
{
- u32 addr = (__force u32)to;
- u32 end = min(addr + size, TASK_SIZE);
+ u32 addr, end;

BUILD_BUG_ON(!__builtin_constant_p(dir));
if (!(dir & KUAP_WRITE))
return;

- if (!addr || addr >= TASK_SIZE || !size)
+ addr = (__force u32)to;
+
+ if (unlikely(addr >= TASK_SIZE || !size))
return;

+ end = min(addr + size, TASK_SIZE);
current->thread.kuap = 0;
kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */
}
--
2.25.0

2020-01-24 14:35:58

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v4 7/7] powerpc: Implement user_access_save() and user_access_restore()

Implement user_access_save() and user_access_restore()

On 8xx and radix:
- On save, get the value of the associated special register
then prevent user access.
- On restore, set back the saved value to the associated special
register.

On book3s/32:
- On save, get the value stored in current->thread.kuap and prevent
user access.
- On restore, regenerate address range from the stored value and
reopen read/write access for that range.

Signed-off-by: Christophe Leroy <[email protected]>
---
v4: new
---
arch/powerpc/include/asm/book3s/32/kup.h | 23 +++++++++++++++++++
.../powerpc/include/asm/book3s/64/kup-radix.h | 22 ++++++++++++++++++
arch/powerpc/include/asm/kup.h | 2 ++
arch/powerpc/include/asm/nohash/32/kup-8xx.h | 14 +++++++++++
arch/powerpc/include/asm/uaccess.h | 5 ++--
5 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 17e069291c72..3c0ba22dc360 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -153,6 +153,29 @@ static __always_inline void prevent_user_access(void __user *to, const void __us
kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */
}

+static inline unsigned long prevent_user_access_return(void)
+{
+ unsigned long flags = current->thread.kuap;
+ unsigned long addr = flags & 0xf0000000;
+ unsigned long end = flags << 28;
+ void __user *to = (__force void __user *)addr;
+
+ if (flags)
+ prevent_user_access(to, to, end - addr, KUAP_READ_WRITE);
+
+ return flags;
+}
+
+static inline void restore_user_access(unsigned long flags)
+{
+ unsigned long addr = flags & 0xf0000000;
+ unsigned long end = flags << 28;
+ void __user *to = (__force void __user *)addr;
+
+ if (flags)
+ allow_user_access(to, to, end - addr, KUAP_READ_WRITE);
+}
+
static inline bool
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index a0263e94df33..90dd3a3fc8c7 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -63,6 +63,14 @@
* because that would require an expensive read/modify write of the AMR.
*/

+static inline unsigned long get_kuap(void)
+{
+ if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP))
+ return 0;
+
+ return mfspr(SPRN_AMR);
+}
+
static inline void set_kuap(unsigned long value)
{
if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP))
@@ -98,6 +106,20 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
set_kuap(AMR_KUAP_BLOCKED);
}

+static inline unsigned long prevent_user_access_return(void)
+{
+ unsigned long flags = get_kuap();
+
+ set_kuap(AMR_KUAP_BLOCKED);
+
+ return flags;
+}
+
+static inline void restore_user_access(unsigned long flags)
+{
+ set_kuap(flags);
+}
+
static inline bool
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index c3ce7e8ae9ea..92bcd1a26d73 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -55,6 +55,8 @@ static inline void allow_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir) { }
static inline void prevent_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir) { }
+static inline unsigned long prevent_user_access_return(void) { return 0UL; }
+static inline void restore_user_access(unsigned long flags) { }
static inline bool
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 1d70c80366fd..85ed2390fb99 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -46,6 +46,20 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
mtspr(SPRN_MD_AP, MD_APG_KUAP);
}

+static inline unsigned long prevent_user_access_return(void)
+{
+ unsigned long flags = mfspr(SPRN_MD_AP);
+
+ mtspr(SPRN_MD_AP, MD_APG_KUAP);
+
+ return flags;
+}
+
+static inline void restore_user_access(unsigned long flags)
+{
+ mtspr(SPRN_MD_AP, flags);
+}
+
static inline bool
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index af905d7fc1df..2f500debae21 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -465,9 +465,8 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
}
#define user_access_begin user_access_begin
#define user_access_end prevent_current_access_user
-
-static inline unsigned long user_access_save(void) { return 0UL; }
-static inline void user_access_restore(unsigned long flags) { }
+#define user_access_save prevent_user_access_return
+#define user_access_restore restore_user_access

#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
--
2.25.0