Some architectures like powerpc64 have the capability to separate
read access and write access protection.
For get_user() and copy_from_user(), powerpc64 only open read access.
For put_user() and copy_to_user(), powerpc64 only open write access.
But when using unsafe_get_user() or unsafe_put_user(),
user_access_begin open both read and write.
Other architectures like powerpc book3s 32 bits only allow write
access protection. And on this architecture protection is an heavy
operation as it requires locking/unlocking per segment of 256Mbytes.
On those architecture it is therefore desirable to do the unlocking
only for write access. (Note that book3s/32 ranges from very old
powermac from the 90's with powerpc 601 processor, till modern
ADSL boxes with PowerQuicc II modern processors for instance so it
is still worth considering)
In order to avoid any risk based of hacking some variable parameters
passed to user_access_begin/end that would allow hacking and
leaving user access open or opening too much, it is preferable to
use dedicated static functions that can't be overridden.
Add a user_read_access_begin and user_read_access_end to only open
read access.
Add a user_write_access_begin and user_write_access_end to only open
write access.
By default, when undefined, those new access helpers default on the
existing user_access_begin and user_access_end.
Signed-off-by: Christophe Leroy <[email protected]>
Link: https://patchwork.ozlabs.org/patch/1227926/
---
Resending this series as I mistakenly only sent it to powerpc list
begining of February (https://patchwork.ozlabs.org/patch/1233172/)
This series is based on the discussion we had in January, see
https://patchwork.ozlabs.org/patch/1227926/ . I tried to
take into account all remarks, especially @hpa 's remark to use
a fixed API on not base the relocking on a magic id returned at
unlocking.
This series is awaited for implementing selective lkdtm test to
test powerpc64 independant read and write protection, see
https://patchwork.ozlabs.org/patch/1231765/
include/linux/uaccess.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 67f016010aad..9861c89f93be 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -378,6 +378,14 @@ extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
static inline unsigned long user_access_save(void) { return 0UL; }
static inline void user_access_restore(unsigned long flags) { }
#endif
+#ifndef user_write_access_begin
+#define user_write_access_begin user_access_begin
+#define user_write_access_end user_access_end
+#endif
+#ifndef user_read_access_begin
+#define user_read_access_begin user_access_begin
+#define user_read_access_end user_access_end
+#endif
#ifdef CONFIG_HARDENED_USERCOPY
void usercopy_warn(const char *name, const char *detail, bool to_user,
--
2.25.0
Add support for selective read or write user access with
user_read_access_begin/end and user_write_access_begin/end.
Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/include/asm/book3s/32/kup.h | 4 ++--
arch/powerpc/include/asm/kup.h | 14 +++++++++++++-
arch/powerpc/include/asm/uaccess.h | 22 ++++++++++++++++++++++
3 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 3c0ba22dc360..1617e73bee30 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -108,7 +108,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user
u32 addr, end;
BUILD_BUG_ON(!__builtin_constant_p(dir));
- BUILD_BUG_ON(dir == KUAP_CURRENT);
+ BUILD_BUG_ON(dir & ~KUAP_READ_WRITE);
if (!(dir & KUAP_WRITE))
return;
@@ -131,7 +131,7 @@ static __always_inline void prevent_user_access(void __user *to, const void __us
BUILD_BUG_ON(!__builtin_constant_p(dir));
- if (dir == KUAP_CURRENT) {
+ if (dir & KUAP_CURRENT_WRITE) {
u32 kuap = current->thread.kuap;
if (unlikely(!kuap))
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 92bcd1a26d73..c745ee41ad66 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -10,7 +10,9 @@
* Use the current saved situation instead of the to/from/size params.
* Used on book3s/32
*/
-#define KUAP_CURRENT 4
+#define KUAP_CURRENT_READ 4
+#define KUAP_CURRENT_WRITE 8
+#define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE)
#ifdef CONFIG_PPC64
#include <asm/book3s/64/kup-radix.h>
@@ -101,6 +103,16 @@ static inline void prevent_current_access_user(void)
prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT);
}
+static inline void prevent_current_read_from_user(void)
+{
+ prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_READ);
+}
+
+static inline void prevent_current_write_to_user(void)
+{
+ prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_WRITE);
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_POWERPC_KUAP_H_ */
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 2f500debae21..4427d419eb1d 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -468,6 +468,28 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
#define user_access_save prevent_user_access_return
#define user_access_restore restore_user_access
+static __must_check inline bool
+user_read_access_begin(const void __user *ptr, size_t len)
+{
+ if (unlikely(!access_ok(ptr, len)))
+ return false;
+ allow_read_from_user(ptr, len);
+ return true;
+}
+#define user_read_access_begin user_read_access_begin
+#define user_read_access_end prevent_current_read_from_user
+
+static __must_check inline bool
+user_write_access_begin(const void __user *ptr, size_t len)
+{
+ if (unlikely(!access_ok(ptr, len)))
+ return false;
+ allow_write_to_user((void __user *)ptr, len);
+ return true;
+}
+#define user_write_access_begin user_write_access_begin
+#define user_write_access_end prevent_current_write_to_user
+
#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
#define unsafe_put_user(x, p, e) unsafe_op_wrap(__put_user_allowed(x, p), e)
--
2.25.0
When opening user access to only perform reads, only open read access.
When opening user access to only perform writes, only open write
access.
Signed-off-by: Christophe Leroy <[email protected]>
---
fs/readdir.c | 12 ++++++------
kernel/compat.c | 12 ++++++------
kernel/exit.c | 12 ++++++------
lib/strncpy_from_user.c | 4 ++--
lib/strnlen_user.c | 4 ++--
lib/usercopy.c | 6 +++---
6 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/fs/readdir.c b/fs/readdir.c
index de2eceffdee8..ed6aaad451aa 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -242,7 +242,7 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
return -EINTR;
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
- if (!user_access_begin(prev, reclen + prev_reclen))
+ if (!user_write_access_begin(prev, reclen + prev_reclen))
goto efault;
/* This might be 'dirent->d_off', but if so it will get overwritten */
@@ -251,14 +251,14 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
- user_access_end();
+ user_write_access_end();
buf->current_dir = (void __user *)dirent + reclen;
buf->prev_reclen = reclen;
buf->count -= reclen;
return 0;
efault_end:
- user_access_end();
+ user_write_access_end();
efault:
buf->error = -EFAULT;
return -EFAULT;
@@ -327,7 +327,7 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
return -EINTR;
dirent = buf->current_dir;
prev = (void __user *)dirent - prev_reclen;
- if (!user_access_begin(prev, reclen + prev_reclen))
+ if (!user_write_access_begin(prev, reclen + prev_reclen))
goto efault;
/* This might be 'dirent->d_off', but if so it will get overwritten */
@@ -336,7 +336,7 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
unsafe_put_user(d_type, &dirent->d_type, efault_end);
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
- user_access_end();
+ user_write_access_end();
buf->prev_reclen = reclen;
buf->current_dir = (void __user *)dirent + reclen;
@@ -344,7 +344,7 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
return 0;
efault_end:
- user_access_end();
+ user_write_access_end();
efault:
buf->error = -EFAULT;
return -EFAULT;
diff --git a/kernel/compat.c b/kernel/compat.c
index 843dd17e6078..705ca7e418c6 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -199,7 +199,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
- if (!user_access_begin(umask, bitmap_size / 8))
+ if (!user_write_access_begin(umask, bitmap_size / 8))
return -EFAULT;
while (nr_compat_longs > 1) {
@@ -211,11 +211,11 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
}
if (nr_compat_longs)
unsafe_get_user(*mask, umask++, Efault);
- user_access_end();
+ user_read_access_end();
return 0;
Efault:
- user_access_end();
+ user_read_access_end();
return -EFAULT;
}
@@ -228,7 +228,7 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
- if (!user_access_begin(umask, bitmap_size / 8))
+ if (!user_read_access_begin(umask, bitmap_size / 8))
return -EFAULT;
while (nr_compat_longs > 1) {
@@ -239,10 +239,10 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
}
if (nr_compat_longs)
unsafe_put_user((compat_ulong_t)*mask, umask++, Efault);
- user_access_end();
+ user_write_access_end();
return 0;
Efault:
- user_access_end();
+ user_write_access_end();
return -EFAULT;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index d70d47159640..61b2f7a85079 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1555,7 +1555,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
if (!infop)
return err;
- if (!user_access_begin(infop, sizeof(*infop)))
+ if (!user_write_access_begin(infop, sizeof(*infop)))
return -EFAULT;
unsafe_put_user(signo, &infop->si_signo, Efault);
@@ -1564,10 +1564,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
unsafe_put_user(info.pid, &infop->si_pid, Efault);
unsafe_put_user(info.uid, &infop->si_uid, Efault);
unsafe_put_user(info.status, &infop->si_status, Efault);
- user_access_end();
+ user_write_access_end();
return err;
Efault:
- user_access_end();
+ user_write_access_end();
return -EFAULT;
}
@@ -1682,7 +1682,7 @@ COMPAT_SYSCALL_DEFINE5(waitid,
if (!infop)
return err;
- if (!user_access_begin(infop, sizeof(*infop)))
+ if (!user_write_access_begin(infop, sizeof(*infop)))
return -EFAULT;
unsafe_put_user(signo, &infop->si_signo, Efault);
@@ -1691,10 +1691,10 @@ COMPAT_SYSCALL_DEFINE5(waitid,
unsafe_put_user(info.pid, &infop->si_pid, Efault);
unsafe_put_user(info.uid, &infop->si_uid, Efault);
unsafe_put_user(info.status, &infop->si_status, Efault);
- user_access_end();
+ user_write_access_end();
return err;
Efault:
- user_access_end();
+ user_write_access_end();
return -EFAULT;
}
#endif
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 706020b06617..b90ec550183a 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -116,9 +116,9 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
kasan_check_write(dst, count);
check_object_size(dst, count, false);
- if (user_access_begin(src, max)) {
+ if (user_read_access_begin(src, max)) {
retval = do_strncpy_from_user(dst, src, count, max);
- user_access_end();
+ user_read_access_end();
return retval;
}
}
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 41670d4a5816..1616710b8a82 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -109,9 +109,9 @@ long strnlen_user(const char __user *str, long count)
if (max > count)
max = count;
- if (user_access_begin(str, max)) {
+ if (user_read_access_begin(str, max)) {
retval = do_strnlen_user(str, count, max);
- user_access_end();
+ user_read_access_end();
return retval;
}
}
diff --git a/lib/usercopy.c b/lib/usercopy.c
index cbb4d9ec00f2..ca2a697a2061 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -58,7 +58,7 @@ int check_zeroed_user(const void __user *from, size_t size)
from -= align;
size += align;
- if (!user_access_begin(from, size))
+ if (!user_read_access_begin(from, size))
return -EFAULT;
unsafe_get_user(val, (unsigned long __user *) from, err_fault);
@@ -79,10 +79,10 @@ int check_zeroed_user(const void __user *from, size_t size)
val &= aligned_byte_mask(size);
done:
- user_access_end();
+ user_read_access_end();
return (val == 0);
err_fault:
- user_access_end();
+ user_read_access_end();
return -EFAULT;
}
EXPORT_SYMBOL(check_zeroed_user);
--
2.25.0
When i915_gem_execbuffer2_ioctl() is using user_access_begin(),
that's only to perform unsafe_put_user() so use
user_write_access_begin() in order to only open write access.
Signed-off-by: Christophe Leroy <[email protected]>
---
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 7643a30ba4cd..4be8205a70b6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1611,14 +1611,14 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
* happened we would make the mistake of assuming that the
* relocations were valid.
*/
- if (!user_access_begin(urelocs, size))
+ if (!user_write_access_begin(urelocs, size))
goto end;
for (copied = 0; copied < nreloc; copied++)
unsafe_put_user(-1,
&urelocs[copied].presumed_offset,
end_user);
- user_access_end();
+ user_write_access_end();
eb->exec[i].relocs_ptr = (uintptr_t)relocs;
}
@@ -1626,7 +1626,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
return 0;
end_user:
- user_access_end();
+ user_write_access_end();
end:
kvfree(relocs);
err = -EFAULT;
@@ -2991,7 +2991,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
* And this range already got effectively checked earlier
* when we did the "copy_from_user()" above.
*/
- if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list)))
+ if (!user_write_access_begin(user_exec_list,
+ count * sizeof(*user_exec_list)))
goto end;
for (i = 0; i < args->buffer_count; i++) {
@@ -3005,7 +3006,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
end_user);
}
end_user:
- user_access_end();
+ user_write_access_end();
end:;
}
--
2.25.0
On Thu, Apr 02, 2020 at 07:34:17AM +0000, Christophe Leroy wrote:
> [...]
> diff --git a/kernel/compat.c b/kernel/compat.c
> index 843dd17e6078..705ca7e418c6 100644
> --- a/kernel/compat.c
> +++ b/kernel/compat.c
> @@ -199,7 +199,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
> bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
> nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
>
> - if (!user_access_begin(umask, bitmap_size / 8))
> + if (!user_write_access_begin(umask, bitmap_size / 8))
This looks mismatched: should be user_read_access_begin()?
> return -EFAULT;
>
> while (nr_compat_longs > 1) {
> @@ -211,11 +211,11 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
> }
> if (nr_compat_longs)
> unsafe_get_user(*mask, umask++, Efault);
> - user_access_end();
> + user_read_access_end();
> return 0;
>
> Efault:
> - user_access_end();
> + user_read_access_end();
> return -EFAULT;
> }
(These correctly end read access.)
>
> @@ -228,7 +228,7 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
> bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
> nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
>
> - if (!user_access_begin(umask, bitmap_size / 8))
> + if (!user_read_access_begin(umask, bitmap_size / 8))
And ..._write_... here?
> return -EFAULT;
>
> while (nr_compat_longs > 1) {
> @@ -239,10 +239,10 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
> }
> if (nr_compat_longs)
> unsafe_put_user((compat_ulong_t)*mask, umask++, Efault);
> - user_access_end();
> + user_write_access_end();
> return 0;
> Efault:
> - user_access_end();
> + user_write_access_end();
> return -EFAULT;
> }
(These correctly end write access.)
All the others look correct. With the above fixed:
Reviewed-by: Kees Cook <[email protected]>
--
Kees Cook
On Thu, Apr 02, 2020 at 07:34:18AM +0000, Christophe Leroy wrote:
> When i915_gem_execbuffer2_ioctl() is using user_access_begin(),
> that's only to perform unsafe_put_user() so use
> user_write_access_begin() in order to only open write access.
>
> Signed-off-by: Christophe Leroy <[email protected]>
Why is this split from the other conversions?
Reviewed-by: Kees Cook <[email protected]>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 11 ++++++-----
> 1 file changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 7643a30ba4cd..4be8205a70b6 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -1611,14 +1611,14 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
> * happened we would make the mistake of assuming that the
> * relocations were valid.
> */
> - if (!user_access_begin(urelocs, size))
> + if (!user_write_access_begin(urelocs, size))
> goto end;
>
> for (copied = 0; copied < nreloc; copied++)
> unsafe_put_user(-1,
> &urelocs[copied].presumed_offset,
> end_user);
> - user_access_end();
> + user_write_access_end();
>
> eb->exec[i].relocs_ptr = (uintptr_t)relocs;
> }
> @@ -1626,7 +1626,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
> return 0;
>
> end_user:
> - user_access_end();
> + user_write_access_end();
> end:
> kvfree(relocs);
> err = -EFAULT;
> @@ -2991,7 +2991,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
> * And this range already got effectively checked earlier
> * when we did the "copy_from_user()" above.
> */
> - if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list)))
> + if (!user_write_access_begin(user_exec_list,
> + count * sizeof(*user_exec_list)))
> goto end;
>
> for (i = 0; i < args->buffer_count; i++) {
> @@ -3005,7 +3006,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
> end_user);
> }
> end_user:
> - user_access_end();
> + user_write_access_end();
> end:;
> }
>
> --
> 2.25.0
>
--
Kees Cook
On Thu, Apr 02, 2020 at 07:34:19AM +0000, Christophe Leroy wrote:
> Add support for selective read or write user access with
> user_read_access_begin/end and user_write_access_begin/end.
>
> Signed-off-by: Christophe Leroy <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
-Kees
> ---
> arch/powerpc/include/asm/book3s/32/kup.h | 4 ++--
> arch/powerpc/include/asm/kup.h | 14 +++++++++++++-
> arch/powerpc/include/asm/uaccess.h | 22 ++++++++++++++++++++++
> 3 files changed, 37 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
> index 3c0ba22dc360..1617e73bee30 100644
> --- a/arch/powerpc/include/asm/book3s/32/kup.h
> +++ b/arch/powerpc/include/asm/book3s/32/kup.h
> @@ -108,7 +108,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user
> u32 addr, end;
>
> BUILD_BUG_ON(!__builtin_constant_p(dir));
> - BUILD_BUG_ON(dir == KUAP_CURRENT);
> + BUILD_BUG_ON(dir & ~KUAP_READ_WRITE);
>
> if (!(dir & KUAP_WRITE))
> return;
> @@ -131,7 +131,7 @@ static __always_inline void prevent_user_access(void __user *to, const void __us
>
> BUILD_BUG_ON(!__builtin_constant_p(dir));
>
> - if (dir == KUAP_CURRENT) {
> + if (dir & KUAP_CURRENT_WRITE) {
> u32 kuap = current->thread.kuap;
>
> if (unlikely(!kuap))
> diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
> index 92bcd1a26d73..c745ee41ad66 100644
> --- a/arch/powerpc/include/asm/kup.h
> +++ b/arch/powerpc/include/asm/kup.h
> @@ -10,7 +10,9 @@
> * Use the current saved situation instead of the to/from/size params.
> * Used on book3s/32
> */
> -#define KUAP_CURRENT 4
> +#define KUAP_CURRENT_READ 4
> +#define KUAP_CURRENT_WRITE 8
> +#define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE)
>
> #ifdef CONFIG_PPC64
> #include <asm/book3s/64/kup-radix.h>
> @@ -101,6 +103,16 @@ static inline void prevent_current_access_user(void)
> prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT);
> }
>
> +static inline void prevent_current_read_from_user(void)
> +{
> + prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_READ);
> +}
> +
> +static inline void prevent_current_write_to_user(void)
> +{
> + prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_WRITE);
> +}
> +
> #endif /* !__ASSEMBLY__ */
>
> #endif /* _ASM_POWERPC_KUAP_H_ */
> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> index 2f500debae21..4427d419eb1d 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -468,6 +468,28 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
> #define user_access_save prevent_user_access_return
> #define user_access_restore restore_user_access
>
> +static __must_check inline bool
> +user_read_access_begin(const void __user *ptr, size_t len)
> +{
> + if (unlikely(!access_ok(ptr, len)))
> + return false;
> + allow_read_from_user(ptr, len);
> + return true;
> +}
> +#define user_read_access_begin user_read_access_begin
> +#define user_read_access_end prevent_current_read_from_user
> +
> +static __must_check inline bool
> +user_write_access_begin(const void __user *ptr, size_t len)
> +{
> + if (unlikely(!access_ok(ptr, len)))
> + return false;
> + allow_write_to_user((void __user *)ptr, len);
> + return true;
> +}
> +#define user_write_access_begin user_write_access_begin
> +#define user_write_access_end prevent_current_write_to_user
> +
> #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
> #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
> #define unsafe_put_user(x, p, e) unsafe_op_wrap(__put_user_allowed(x, p), e)
> --
> 2.25.0
>
--
Kees Cook
On Thu, Apr 02, 2020 at 07:34:16AM +0000, Christophe Leroy wrote:
> Some architectures like powerpc64 have the capability to separate
> read access and write access protection.
> For get_user() and copy_from_user(), powerpc64 only open read access.
> For put_user() and copy_to_user(), powerpc64 only open write access.
> But when using unsafe_get_user() or unsafe_put_user(),
> user_access_begin open both read and write.
>
> Other architectures like powerpc book3s 32 bits only allow write
> access protection. And on this architecture protection is an heavy
> operation as it requires locking/unlocking per segment of 256Mbytes.
> On those architecture it is therefore desirable to do the unlocking
> only for write access. (Note that book3s/32 ranges from very old
> powermac from the 90's with powerpc 601 processor, till modern
> ADSL boxes with PowerQuicc II modern processors for instance so it
> is still worth considering)
>
> In order to avoid any risk based of hacking some variable parameters
> passed to user_access_begin/end that would allow hacking and
> leaving user access open or opening too much, it is preferable to
> use dedicated static functions that can't be overridden.
>
> Add a user_read_access_begin and user_read_access_end to only open
> read access.
>
> Add a user_write_access_begin and user_write_access_end to only open
> write access.
>
> By default, when undefined, those new access helpers default on the
> existing user_access_begin and user_access_end.
>
> Signed-off-by: Christophe Leroy <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
-Kees
> Link: https://patchwork.ozlabs.org/patch/1227926/
> ---
> Resending this series as I mistakenly only sent it to powerpc list
> begining of February (https://patchwork.ozlabs.org/patch/1233172/)
>
> This series is based on the discussion we had in January, see
> https://patchwork.ozlabs.org/patch/1227926/ . I tried to
> take into account all remarks, especially @hpa 's remark to use
> a fixed API on not base the relocking on a magic id returned at
> unlocking.
>
> This series is awaited for implementing selective lkdtm test to
> test powerpc64 independant read and write protection, see
> https://patchwork.ozlabs.org/patch/1231765/
>
> include/linux/uaccess.h | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> index 67f016010aad..9861c89f93be 100644
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -378,6 +378,14 @@ extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
> static inline unsigned long user_access_save(void) { return 0UL; }
> static inline void user_access_restore(unsigned long flags) { }
> #endif
> +#ifndef user_write_access_begin
> +#define user_write_access_begin user_access_begin
> +#define user_write_access_end user_access_end
> +#endif
> +#ifndef user_read_access_begin
> +#define user_read_access_begin user_access_begin
> +#define user_read_access_end user_access_end
> +#endif
>
> #ifdef CONFIG_HARDENED_USERCOPY
> void usercopy_warn(const char *name, const char *detail, bool to_user,
> --
> 2.25.0
>
--
Kees Cook
Le 02/04/2020 à 09:52, Kees Cook a écrit :
> On Thu, Apr 02, 2020 at 07:34:18AM +0000, Christophe Leroy wrote:
>> When i915_gem_execbuffer2_ioctl() is using user_access_begin(),
>> that's only to perform unsafe_put_user() so use
>> user_write_access_begin() in order to only open write access.
>>
>> Signed-off-by: Christophe Leroy <[email protected]>
>
> Why is this split from the other conversions?
I split it from the other because this one is in drivers while other
ones are in core part of the kernel.
Is it better to squash it in the previous patch ?
Christophe
>
> Reviewed-by: Kees Cook <[email protected]>
>
>
>> ---
>> drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 11 ++++++-----
>> 1 file changed, 6 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index 7643a30ba4cd..4be8205a70b6 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -1611,14 +1611,14 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
>> * happened we would make the mistake of assuming that the
>> * relocations were valid.
>> */
>> - if (!user_access_begin(urelocs, size))
>> + if (!user_write_access_begin(urelocs, size))
>> goto end;
>>
>> for (copied = 0; copied < nreloc; copied++)
>> unsafe_put_user(-1,
>> &urelocs[copied].presumed_offset,
>> end_user);
>> - user_access_end();
>> + user_write_access_end();
>>
>> eb->exec[i].relocs_ptr = (uintptr_t)relocs;
>> }
>> @@ -1626,7 +1626,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
>> return 0;
>>
>> end_user:
>> - user_access_end();
>> + user_write_access_end();
>> end:
>> kvfree(relocs);
>> err = -EFAULT;
>> @@ -2991,7 +2991,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
>> * And this range already got effectively checked earlier
>> * when we did the "copy_from_user()" above.
>> */
>> - if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list)))
>> + if (!user_write_access_begin(user_exec_list,
>> + count * sizeof(*user_exec_list)))
>> goto end;
>>
>> for (i = 0; i < args->buffer_count; i++) {
>> @@ -3005,7 +3006,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
>> end_user);
>> }
>> end_user:
>> - user_access_end();
>> + user_write_access_end();
>> end:;
>> }
>>
>> --
>> 2.25.0
>>
>
Le 02/04/2020 à 09:51, Kees Cook a écrit :
> On Thu, Apr 02, 2020 at 07:34:17AM +0000, Christophe Leroy wrote:
>> [...]
>> diff --git a/kernel/compat.c b/kernel/compat.c
>> index 843dd17e6078..705ca7e418c6 100644
>> --- a/kernel/compat.c
>> +++ b/kernel/compat.c
>> @@ -199,7 +199,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
>> bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
>> nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
>>
>> - if (!user_access_begin(umask, bitmap_size / 8))
>> + if (!user_write_access_begin(umask, bitmap_size / 8))
>
> This looks mismatched: should be user_read_access_begin()?
Oops, looks like a copy/paste/modify where I modified the original
instead of the copy.
>
>> return -EFAULT;
>>
>> while (nr_compat_longs > 1) {
>> @@ -211,11 +211,11 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
>> }
>> if (nr_compat_longs)
>> unsafe_get_user(*mask, umask++, Efault);
>> - user_access_end();
>> + user_read_access_end();
>> return 0;
>>
>> Efault:
>> - user_access_end();
>> + user_read_access_end();
>> return -EFAULT;
>> }
>
> (These correctly end read access.)
>
>>
>> @@ -228,7 +228,7 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
>> bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
>> nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
>>
>> - if (!user_access_begin(umask, bitmap_size / 8))
>> + if (!user_read_access_begin(umask, bitmap_size / 8))
>
> And ..._write_... here?
>
>> return -EFAULT;
>>
>> while (nr_compat_longs > 1) {
>> @@ -239,10 +239,10 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
>> }
>> if (nr_compat_longs)
>> unsafe_put_user((compat_ulong_t)*mask, umask++, Efault);
>> - user_access_end();
>> + user_write_access_end();
>> return 0;
>> Efault:
>> - user_access_end();
>> + user_write_access_end();
>> return -EFAULT;
>> }
>
> (These correctly end write access.)
>
>
> All the others look correct. With the above fixed:
>
> Reviewed-by: Kees Cook <[email protected]>
>
Thanks
Christophe
On Thu, Apr 02, 2020 at 07:34:16AM +0000, Christophe Leroy wrote:
> Some architectures like powerpc64 have the capability to separate
> read access and write access protection.
> For get_user() and copy_from_user(), powerpc64 only open read access.
> For put_user() and copy_to_user(), powerpc64 only open write access.
> But when using unsafe_get_user() or unsafe_put_user(),
> user_access_begin open both read and write.
>
> Other architectures like powerpc book3s 32 bits only allow write
> access protection. And on this architecture protection is an heavy
> operation as it requires locking/unlocking per segment of 256Mbytes.
> On those architecture it is therefore desirable to do the unlocking
> only for write access. (Note that book3s/32 ranges from very old
> powermac from the 90's with powerpc 601 processor, till modern
> ADSL boxes with PowerQuicc II modern processors for instance so it
> is still worth considering)
>
> In order to avoid any risk based of hacking some variable parameters
> passed to user_access_begin/end that would allow hacking and
> leaving user access open or opening too much, it is preferable to
> use dedicated static functions that can't be overridden.
>
> Add a user_read_access_begin and user_read_access_end to only open
> read access.
>
> Add a user_write_access_begin and user_write_access_end to only open
> write access.
>
> By default, when undefined, those new access helpers default on the
> existing user_access_begin and user_access_end.
The only problem I have is that we'd better choose the calling
conventions that work for other architectures as well.
AFAICS, aside of ppc and x86 we have (at least) this:
arm:
unsigned int __ua_flags = uaccess_save_and_enable();
...
uaccess_restore(__ua_flags);
arm64:
uaccess_enable_not_uao();
...
uaccess_disable_not_uao();
riscv:
__enable_user_access();
...
__disable_user_access();
s390/mvc:
old_fs = enable_sacf_uaccess();
...
disable_sacf_uaccess(old_fs);
arm64 and riscv are easy - they map well on what we have now.
The interesting ones are ppc, arm and s390.
You wants to specify the kind of access; OK, but... it's not just read
vs. write - there's read-write as well. AFAICS, there are 3 users of
that:
* copy_in_user()
* arch_futex_atomic_op_inuser()
* futex_atomic_cmpxchg_inatomic()
The former is of dubious utility (all users outside of arch are in
the badly done compat code), but the other two are not going to go
away.
What should we do about that? Do we prohibit such blocks outside
of arch?
What should we do about arm and s390? There we want a cookie passed
from beginning of block to its end; should that be a return value?
And at least on arm that thing nests (see e.g. __clear_user_memset()
there), so "stash that cookie in current->something" is not a solution...
Folks, let's sort that out while we still have few users of that
interface; changing the calling conventions later will be much harder.
Comments?
On Thu, Apr 02, 2020 at 07:03:28PM +0200, Christophe Leroy wrote:
> > What should we do about arm and s390? There we want a cookie passed
> > from beginning of block to its end; should that be a return value?
>
> That was the way I implemented it in January, see
> https://patchwork.ozlabs.org/patch/1227926/
>
> There was some discussion around that and most noticeable was:
>
> H. Peter (hpa) said about it: "I have *deep* concern with carrying state in
> a "key" variable: it's a direct attack vector for a crowbar attack,
> especially since it is by definition live inside a user access region."
I share this concern -- we want to keep user/kernel access as static as
possible. It should be provable with static analysis, etc (e.g. objtool
does this already for x86).
Since this doesn't disrupt existing R+W access, I'd prefer the design of
this series as-is.
--
Kees Cook
Le 02/04/2020 à 18:29, Al Viro a écrit :
> On Thu, Apr 02, 2020 at 07:34:16AM +0000, Christophe Leroy wrote:
>> Some architectures like powerpc64 have the capability to separate
>> read access and write access protection.
>> For get_user() and copy_from_user(), powerpc64 only open read access.
>> For put_user() and copy_to_user(), powerpc64 only open write access.
>> But when using unsafe_get_user() or unsafe_put_user(),
>> user_access_begin open both read and write.
>>
>> Other architectures like powerpc book3s 32 bits only allow write
>> access protection. And on this architecture protection is an heavy
>> operation as it requires locking/unlocking per segment of 256Mbytes.
>> On those architecture it is therefore desirable to do the unlocking
>> only for write access. (Note that book3s/32 ranges from very old
>> powermac from the 90's with powerpc 601 processor, till modern
>> ADSL boxes with PowerQuicc II modern processors for instance so it
>> is still worth considering)
>>
>> In order to avoid any risk based of hacking some variable parameters
>> passed to user_access_begin/end that would allow hacking and
>> leaving user access open or opening too much, it is preferable to
>> use dedicated static functions that can't be overridden.
>>
>> Add a user_read_access_begin and user_read_access_end to only open
>> read access.
>>
>> Add a user_write_access_begin and user_write_access_end to only open
>> write access.
>>
>> By default, when undefined, those new access helpers default on the
>> existing user_access_begin and user_access_end.
>
> The only problem I have is that we'd better choose the calling
> conventions that work for other architectures as well.
>
> AFAICS, aside of ppc and x86 we have (at least) this:
> arm:
> unsigned int __ua_flags = uaccess_save_and_enable();
> ...
> uaccess_restore(__ua_flags);
> arm64:
> uaccess_enable_not_uao();
> ...
> uaccess_disable_not_uao();
> riscv:
> __enable_user_access();
> ...
> __disable_user_access();
> s390/mvc:
> old_fs = enable_sacf_uaccess();
> ...
> disable_sacf_uaccess(old_fs);
>
> arm64 and riscv are easy - they map well on what we have now.
> The interesting ones are ppc, arm and s390.
>
> You wants to specify the kind of access; OK, but... it's not just read
> vs. write - there's read-write as well. AFAICS, there are 3 users of
> that:
> * copy_in_user()
> * arch_futex_atomic_op_inuser()
> * futex_atomic_cmpxchg_inatomic()
> The former is of dubious utility (all users outside of arch are in
> the badly done compat code), but the other two are not going to go
> away.
user_access_begin() grants both read and write.
This patch adds user_read_access_begin() and user_write_access_begin()
but it doesn't remove user_access_begin()
>
> What should we do about that? Do we prohibit such blocks outside
> of arch?
>
> What should we do about arm and s390? There we want a cookie passed
> from beginning of block to its end; should that be a return value?
That was the way I implemented it in January, see
https://patchwork.ozlabs.org/patch/1227926/
There was some discussion around that and most noticeable was:
H. Peter (hpa) said about it: "I have *deep* concern with carrying state
in a "key" variable: it's a direct attack vector for a crowbar attack,
especially since it is by definition live inside a user access region."
>
> And at least on arm that thing nests (see e.g. __clear_user_memset()
> there), so "stash that cookie in current->something" is not a solution...
>
> Folks, let's sort that out while we still have few users of that
> interface; changing the calling conventions later will be much harder.
> Comments?
>
This patch minimises the change by just adding user_read_access_begin()
and user_write_access_begin() keeping the same parameters as the
existing user_access_begin().
So I can come back to a mix of this patch and the January version if it
corresponds to everyone's view, it will also be a bit easier for powerpc
(especially book3s/32). But that didn't seem to be the expected
direction back when we discussed it in January.
Christophe
On Thu, Apr 02, 2020 at 07:03:28PM +0200, Christophe Leroy wrote:
> user_access_begin() grants both read and write.
>
> This patch adds user_read_access_begin() and user_write_access_begin() but
> it doesn't remove user_access_begin()
Ouch... So the most generic name is for the rarest case?
> > What should we do about that? Do we prohibit such blocks outside
> > of arch?
> >
> > What should we do about arm and s390? There we want a cookie passed
> > from beginning of block to its end; should that be a return value?
>
> That was the way I implemented it in January, see
> https://patchwork.ozlabs.org/patch/1227926/
>
> There was some discussion around that and most noticeable was:
>
> H. Peter (hpa) said about it: "I have *deep* concern with carrying state in
> a "key" variable: it's a direct attack vector for a crowbar attack,
> especially since it is by definition live inside a user access region."
> This patch minimises the change by just adding user_read_access_begin() and
> user_write_access_begin() keeping the same parameters as the existing
> user_access_begin().
Umm... What about the arm situation? The same concerns would apply there,
wouldn't they? Currently we have
static __always_inline unsigned int uaccess_save_and_enable(void)
{
#ifdef CONFIG_CPU_SW_DOMAIN_PAN
unsigned int old_domain = get_domain();
/* Set the current domain access to permit user accesses */
set_domain((old_domain & ~domain_mask(DOMAIN_USER)) |
domain_val(DOMAIN_USER, DOMAIN_CLIENT));
return old_domain;
#else
return 0;
#endif
}
and
static __always_inline void uaccess_restore(unsigned int flags)
{
#ifdef CONFIG_CPU_SW_DOMAIN_PAN
/* Restore the user access mask */
set_domain(flags);
#endif
}
How much do we need nesting on those, anyway? rmk?
On Thu, Apr 02, 2020 at 06:50:32PM +0100, Al Viro wrote:
> On Thu, Apr 02, 2020 at 07:03:28PM +0200, Christophe Leroy wrote:
>
> > user_access_begin() grants both read and write.
> >
> > This patch adds user_read_access_begin() and user_write_access_begin() but
> > it doesn't remove user_access_begin()
>
> Ouch... So the most generic name is for the rarest case?
>
> > > What should we do about that? Do we prohibit such blocks outside
> > > of arch?
> > >
> > > What should we do about arm and s390? There we want a cookie passed
> > > from beginning of block to its end; should that be a return value?
> >
> > That was the way I implemented it in January, see
> > https://patchwork.ozlabs.org/patch/1227926/
> >
> > There was some discussion around that and most noticeable was:
> >
> > H. Peter (hpa) said about it: "I have *deep* concern with carrying state in
> > a "key" variable: it's a direct attack vector for a crowbar attack,
> > especially since it is by definition live inside a user access region."
>
> > This patch minimises the change by just adding user_read_access_begin() and
> > user_write_access_begin() keeping the same parameters as the existing
> > user_access_begin().
>
> Umm... What about the arm situation? The same concerns would apply there,
> wouldn't they? Currently we have
> static __always_inline unsigned int uaccess_save_and_enable(void)
> {
> #ifdef CONFIG_CPU_SW_DOMAIN_PAN
> unsigned int old_domain = get_domain();
>
> /* Set the current domain access to permit user accesses */
> set_domain((old_domain & ~domain_mask(DOMAIN_USER)) |
> domain_val(DOMAIN_USER, DOMAIN_CLIENT));
>
> return old_domain;
> #else
> return 0;
> #endif
> }
> and
> static __always_inline void uaccess_restore(unsigned int flags)
> {
> #ifdef CONFIG_CPU_SW_DOMAIN_PAN
> /* Restore the user access mask */
> set_domain(flags);
> #endif
> }
>
> How much do we need nesting on those, anyway? rmk?
Yup, I think it's a weakness of the ARM implementation and I'd like to
not extend it further. AFAIK we should never nest, but I would not be
surprised at all if we did.
If we were looking at a design goal for all architectures, I'd like
to be doing what the public PaX patchset did for their memory access
switching, which is to alarm if calling into "enable" found the access
already enabled, etc. Such a condition would show an unexpected nesting
(like we've seen with similar constructs with set_fs() not getting reset
during an exception handler, etc etc).
--
Kees Cook
Le 02/04/2020 à 19:50, Al Viro a écrit :
> On Thu, Apr 02, 2020 at 07:03:28PM +0200, Christophe Leroy wrote:
>
>> user_access_begin() grants both read and write.
>>
>> This patch adds user_read_access_begin() and user_write_access_begin() but
>> it doesn't remove user_access_begin()
>
> Ouch... So the most generic name is for the rarest case?
>
I can add another patch at the end of the series to rename
user_access_begin() to user_full_access_begin().
Would it suit you ?
Christophe
On Thu, Apr 2, 2020 at 11:36 AM Kees Cook <[email protected]> wrote:
>
> Yup, I think it's a weakness of the ARM implementation and I'd like to
> not extend it further. AFAIK we should never nest, but I would not be
> surprised at all if we did.
Wel, at least the user_access_begin/end() sections can't nest. objtool
verifies and warns about that on x86.
> If we were looking at a design goal for all architectures, I'd like
> to be doing what the public PaX patchset
We already do better than PaX ever did. Seriously. Mainline has long
since passed their hacky garbage.
Plus PaX and grsecurity should be actively shunned. Don't look at it,
don't use it, and tell everybody you know to not use that shit.
Linus
On Thu, Apr 02, 2020 at 12:26:52PM -0700, Linus Torvalds wrote:
> On Thu, Apr 2, 2020 at 11:36 AM Kees Cook <[email protected]> wrote:
> >
> > Yup, I think it's a weakness of the ARM implementation and I'd like to
> > not extend it further. AFAIK we should never nest, but I would not be
> > surprised at all if we did.
>
> Wel, at least the user_access_begin/end() sections can't nest. objtool
> verifies and warns about that on x86.
Right, yes, I mentioned that earlier in the thread. I meant I wasn't
100% sure about ARM's corner cases. I would _hope_ it doesn't.
> > If we were looking at a design goal for all architectures, I'd like
> > to be doing what the public PaX patchset
>
> We already do better than PaX ever did. Seriously. Mainline has long
> since passed their hacky garbage.
I was just speaking to design principles in this area: if the "enable"
is called when already enabled, Something Is Wrong. :) (And one thing
still missing in this general subject is that x86 still lacks SMAP
emulation. And yes, I understand it's just not been a priority for anyone
that can work on it, but it is still a gap.)
--
Kees Cook
On Thu, Apr 2, 2020 at 1:27 PM Kees Cook <[email protected]> wrote:
>
> I was just speaking to design principles in this area: if the "enable"
> is called when already enabled, Something Is Wrong. :)
Well, the "something is wrong" could easily be "the hardware does not
support this".
I'm not at all interested in the crazy code to do this in software.
Nobody sane should ever do that.
Yes, I realize that PaX did software emulation of things like that,
and it was one of the reasons why it was never useful to any normal
use.
Security is not an end goal in itself, it's always secondary to "can I
use this".
Security that means "normal people can't use this, it's only for the
special l33t users" is not security, it's garbage. That "do page
tables in software" was a prime example of garbage.
Linus
On Thu, Apr 02, 2020 at 11:35:57AM -0700, Kees Cook wrote:
> Yup, I think it's a weakness of the ARM implementation and I'd like to
> not extend it further. AFAIK we should never nest, but I would not be
> surprised at all if we did.
>
> If we were looking at a design goal for all architectures, I'd like
> to be doing what the public PaX patchset did for their memory access
> switching, which is to alarm if calling into "enable" found the access
> already enabled, etc. Such a condition would show an unexpected nesting
> (like we've seen with similar constructs with set_fs() not getting reset
> during an exception handler, etc etc).
FWIW, maybe I'm misreading the ARM uaccess logics, but... it smells like
KERNEL_DS is somewhat more dangerous there than on e.g. x86.
Look: with CONFIG_CPU_DOMAINS, set_fs(KERNEL_DS) tells MMU to ignore
per-page permission bits in DOMAIN_KERNEL (i.e. for kernel address
ranges), allowing them even if they would normally be denied. We need
that for actual uaccess loads/stores, since those use insns that pretend
to be done in user mode and we want them to access the kernel pages.
But that affects the normal loads/stores as well; unless I'm misreading
that code, it will ignore (supervisor) r/o on a page. And that's not
just for the code inside the uaccess blocks; *everything* done under
KERNEL_DS is subject to that.
Why do we do that (modify_domain(), that is) inside set_fs() and not
in uaccess_enable() et.al.?
On Fri, Apr 03, 2020 at 01:58:31AM +0100, Al Viro wrote:
> On Thu, Apr 02, 2020 at 11:35:57AM -0700, Kees Cook wrote:
>
> > Yup, I think it's a weakness of the ARM implementation and I'd like to
> > not extend it further. AFAIK we should never nest, but I would not be
> > surprised at all if we did.
> >
> > If we were looking at a design goal for all architectures, I'd like
> > to be doing what the public PaX patchset did for their memory access
> > switching, which is to alarm if calling into "enable" found the access
> > already enabled, etc. Such a condition would show an unexpected nesting
> > (like we've seen with similar constructs with set_fs() not getting reset
> > during an exception handler, etc etc).
>
> FWIW, maybe I'm misreading the ARM uaccess logics, but... it smells like
> KERNEL_DS is somewhat more dangerous there than on e.g. x86.
>
> Look: with CONFIG_CPU_DOMAINS, set_fs(KERNEL_DS) tells MMU to ignore
> per-page permission bits in DOMAIN_KERNEL (i.e. for kernel address
> ranges), allowing them even if they would normally be denied. We need
> that for actual uaccess loads/stores, since those use insns that pretend
> to be done in user mode and we want them to access the kernel pages.
> But that affects the normal loads/stores as well; unless I'm misreading
> that code, it will ignore (supervisor) r/o on a page. And that's not
> just for the code inside the uaccess blocks; *everything* done under
> KERNEL_DS is subject to that.
>
> Why do we do that (modify_domain(), that is) inside set_fs() and not
> in uaccess_enable() et.al.?
First, CONFIG_CPU_DOMAINS is used on older ARMs, not ARMv7. Second,
the kernel image itself is not RO-protected on any ARM32 platform.
If we get rid of CONFIG_CPU_DOMAINS, we will use the ARMv7 method of
user access, which is to use normal load/stores for the user accessors
and every access must check against the address limit, even the
__-accessors.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
On Thu, Apr 02, 2020 at 11:35:57AM -0700, Kees Cook wrote:
> On Thu, Apr 02, 2020 at 06:50:32PM +0100, Al Viro wrote:
> > On Thu, Apr 02, 2020 at 07:03:28PM +0200, Christophe Leroy wrote:
> >
> > > user_access_begin() grants both read and write.
> > >
> > > This patch adds user_read_access_begin() and user_write_access_begin() but
> > > it doesn't remove user_access_begin()
> >
> > Ouch... So the most generic name is for the rarest case?
> >
> > > > What should we do about that? Do we prohibit such blocks outside
> > > > of arch?
> > > >
> > > > What should we do about arm and s390? There we want a cookie passed
> > > > from beginning of block to its end; should that be a return value?
> > >
> > > That was the way I implemented it in January, see
> > > https://patchwork.ozlabs.org/patch/1227926/
> > >
> > > There was some discussion around that and most noticeable was:
> > >
> > > H. Peter (hpa) said about it: "I have *deep* concern with carrying state in
> > > a "key" variable: it's a direct attack vector for a crowbar attack,
> > > especially since it is by definition live inside a user access region."
> >
> > > This patch minimises the change by just adding user_read_access_begin() and
> > > user_write_access_begin() keeping the same parameters as the existing
> > > user_access_begin().
> >
> > Umm... What about the arm situation? The same concerns would apply there,
> > wouldn't they? Currently we have
> > static __always_inline unsigned int uaccess_save_and_enable(void)
> > {
> > #ifdef CONFIG_CPU_SW_DOMAIN_PAN
> > unsigned int old_domain = get_domain();
> >
> > /* Set the current domain access to permit user accesses */
> > set_domain((old_domain & ~domain_mask(DOMAIN_USER)) |
> > domain_val(DOMAIN_USER, DOMAIN_CLIENT));
> >
> > return old_domain;
> > #else
> > return 0;
> > #endif
> > }
> > and
> > static __always_inline void uaccess_restore(unsigned int flags)
> > {
> > #ifdef CONFIG_CPU_SW_DOMAIN_PAN
> > /* Restore the user access mask */
> > set_domain(flags);
> > #endif
> > }
> >
> > How much do we need nesting on those, anyway? rmk?
It's that way because it's easy, logical, and actually *more* efficient
to do it that way, rather than read-modify-write the domain register
each time we want to change it.
> Yup, I think it's a weakness of the ARM implementation and I'd like to
> not extend it further. AFAIK we should never nest, but I would not be
> surprised at all if we did.
There is one known nesting, which is __clear_user() when used with
the (IMHO horrid and I don't care about) UACCESS_WITH_MEMCPY feature.
That's not intentional however.
When I introduced this on ARM, the placement I adopted was to locate
it _as close as sanely possible_ to the userspace access so we
minimised the kernel accesses, so we minimise the number of accesses
that could go stray because of the domain issue - we ideally only
want the access done by the accessor itself to be affected, which
we achieve for most accesses.
Thinking laterally, maybe we should get rid of the whole KERNEL_DS
stuff entirely, and come up with an alternative way of handling the
kernel-wants-to-access-kernelspace-via-user-accessors problem.
Such as, copying some data back to userspace memory?
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
On Fri, Apr 03, 2020 at 01:58:31AM +0100, Al Viro wrote:
> On Thu, Apr 02, 2020 at 11:35:57AM -0700, Kees Cook wrote:
> > Yup, I think it's a weakness of the ARM implementation and I'd like to
> > not extend it further. AFAIK we should never nest, but I would not be
> > surprised at all if we did.
> >
> > If we were looking at a design goal for all architectures, I'd like
> > to be doing what the public PaX patchset did for their memory access
> > switching, which is to alarm if calling into "enable" found the access
> > already enabled, etc. Such a condition would show an unexpected nesting
> > (like we've seen with similar constructs with set_fs() not getting reset
> > during an exception handler, etc etc).
>
> FWIW, maybe I'm misreading the ARM uaccess logics, but... it smells like
> KERNEL_DS is somewhat more dangerous there than on e.g. x86.
>
> Look: with CONFIG_CPU_DOMAINS, set_fs(KERNEL_DS) tells MMU to ignore
> per-page permission bits in DOMAIN_KERNEL (i.e. for kernel address
> ranges), allowing them even if they would normally be denied. We need
> that for actual uaccess loads/stores, since those use insns that pretend
> to be done in user mode and we want them to access the kernel pages.
> But that affects the normal loads/stores as well; unless I'm misreading
> that code, it will ignore (supervisor) r/o on a page. And that's not
> just for the code inside the uaccess blocks; *everything* done under
> KERNEL_DS is subject to that.
That's correct. Luckily this only affects ARMv5 and earlier. From ARMv6
onwards, CONFIG_CPU_USE_DOMAINS is no longer selected and the uaccess
instructions are just plain ldr/str.
Russell should know the details on whether there was much choice. Since
the kernel was living in the linear map with full rwx permissions, the
KERNEL_DS overriding was probably not a concern and the ldrt/strt for
uaccess deemed more secure. We also have weird permission setting
pre-ARMv6 (or rather v6k) where RO user pages are writable from the
kernel with standard str instructions (breaking CoW). I don't recall
whether it was a choice made by the kernel or something the architecture
enforced. The vectors page has to be kernel writable (and user RO) to
store the TLS value in the absence of a TLS register but maybe we could
do this via the linear alias together with the appropriate cache
maintenance.
From ARMv6, the domain overriding had the side-effect of ignoring the XN
bit and causing random instruction fetches from ioremap() areas. So we
had to remove the domain switching. We also gained a dedicated TLS
register.
> Why do we do that (modify_domain(), that is) inside set_fs() and not
> in uaccess_enable() et.al.?
I think uaccess_enable() could indeed switch the kernel domain if
KERNEL_DS is set and move this out of set_fs(). It would reduce the
window the kernel domain permissions are overridden. Anyway,
uaccess_enable() appeared much later on arm when Russell introduced PAN
(SMAP) like support by switching the user domain.
--
Catalin
On Fri, Apr 03, 2020 at 12:26:10PM +0100, Catalin Marinas wrote:
> On Fri, Apr 03, 2020 at 01:58:31AM +0100, Al Viro wrote:
> > On Thu, Apr 02, 2020 at 11:35:57AM -0700, Kees Cook wrote:
> > > Yup, I think it's a weakness of the ARM implementation and I'd like to
> > > not extend it further. AFAIK we should never nest, but I would not be
> > > surprised at all if we did.
> > >
> > > If we were looking at a design goal for all architectures, I'd like
> > > to be doing what the public PaX patchset did for their memory access
> > > switching, which is to alarm if calling into "enable" found the access
> > > already enabled, etc. Such a condition would show an unexpected nesting
> > > (like we've seen with similar constructs with set_fs() not getting reset
> > > during an exception handler, etc etc).
> >
> > FWIW, maybe I'm misreading the ARM uaccess logics, but... it smells like
> > KERNEL_DS is somewhat more dangerous there than on e.g. x86.
> >
> > Look: with CONFIG_CPU_DOMAINS, set_fs(KERNEL_DS) tells MMU to ignore
> > per-page permission bits in DOMAIN_KERNEL (i.e. for kernel address
> > ranges), allowing them even if they would normally be denied. We need
> > that for actual uaccess loads/stores, since those use insns that pretend
> > to be done in user mode and we want them to access the kernel pages.
> > But that affects the normal loads/stores as well; unless I'm misreading
> > that code, it will ignore (supervisor) r/o on a page. And that's not
> > just for the code inside the uaccess blocks; *everything* done under
> > KERNEL_DS is subject to that.
>
> That's correct. Luckily this only affects ARMv5 and earlier. From ARMv6
> onwards, CONFIG_CPU_USE_DOMAINS is no longer selected and the uaccess
> instructions are just plain ldr/str.
>
> Russell should know the details on whether there was much choice. Since
> the kernel was living in the linear map with full rwx permissions, the
> KERNEL_DS overriding was probably not a concern and the ldrt/strt for
> uaccess deemed more secure. We also have weird permission setting
> pre-ARMv6 (or rather v6k) where RO user pages are writable from the
> kernel with standard str instructions (breaking CoW). I don't recall
> whether it was a choice made by the kernel or something the architecture
> enforced. The vectors page has to be kernel writable (and user RO) to
> store the TLS value in the absence of a TLS register but maybe we could
> do this via the linear alias together with the appropriate cache
> maintenance.
>
> From ARMv6, the domain overriding had the side-effect of ignoring the XN
> bit and causing random instruction fetches from ioremap() areas. So we
> had to remove the domain switching. We also gained a dedicated TLS
> register.
Indeed. On pre-ARMv6, we have the following choices for protection
attributes:
Page tables Control Reg Privileged User
AP S,R permission permission
00 0,0 No access No access
00 1,0 Read-only No access
00 0,1 Read-only Read-only
00 1,1 Unpredictable Unpredictable
01 X,X Read/Write No access
10 X,X Read/Write Read-only
11 X,X Read/Write Read/Write
We use S,R=1,0 under Linux because this allows us to read-protect
kernel pages without making them visible to userspace. If we
changed to S,R=0,1, then we could have our read-only permissions
for both kernel and userspace, drop domain switching, and use the
plain LDR/STR instructions, but we then lose the ability to
write-protect module executable code and other parts of kernel
space without making them visible to userspace.
So, it essentially boils down to making a choice - which set of
security features we think are the most important.
> I think uaccess_enable() could indeed switch the kernel domain if
> KERNEL_DS is set and move this out of set_fs(). It would reduce the
> window the kernel domain permissions are overridden. Anyway,
> uaccess_enable() appeared much later on arm when Russell introduced PAN
> (SMAP) like support by switching the user domain.
Yes, that would be a possibility. Another possibility would be to
eliminate as much usage of KERNEL_DS as possible - I've just found
one instance in sys_oabi-compat.c that can be eliminated (epoll_ctl)
but there's several there that can't with the current code structure,
and re-coding the contents of some fs/* functions to work around that
is a very bad idea. If there's some scope for rejigging some of the
fs/* code, it may be possible to elimate some other cases in there.
I notice that the fs/* code seems like some of the last remaining
users of KERNEL_DS, although I suspect that some aren't possible to
eliminate. :(
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 10.2Mbps down 587kbps up
On Fri, Apr 03, 2020 at 02:37:19PM +0100, Russell King - ARM Linux admin wrote:
> > I think uaccess_enable() could indeed switch the kernel domain if
> > KERNEL_DS is set and move this out of set_fs(). It would reduce the
> > window the kernel domain permissions are overridden. Anyway,
> > uaccess_enable() appeared much later on arm when Russell introduced PAN
> > (SMAP) like support by switching the user domain.
>
> Yes, that would be a possibility. Another possibility would be to
> eliminate as much usage of KERNEL_DS as possible
That's definitely worth doing, but that's another long-term project ;-/
> - I've just found
> one instance in sys_oabi-compat.c that can be eliminated (epoll_ctl)
> but there's several there that can't with the current code structure,
> and re-coding the contents of some fs/* functions to work around that
> is a very bad idea. If there's some scope for rejigging some of the
> fs/* code, it may be possible to elimate some other cases in there.
Well, your do_locks() definitely can be converted. epoll_wait()...
not sure, need to look into that. Is that about the layout mismatch
between struct oabi_epoll_event and struct epoll_event? In case of
semtimedop... Hell knows, I would probably consider moving that thing
into ipc/sem.c under ifdef...