Some architectures grand full access to userspace regardless of the
address/len passed to user_access_begin(), but other architectures
only grand access to the requested area.
For exemple, on 32 bits powerpc (book3s/32), access is granted by
segments of 256 Mbytes.
Modify filldir() and filldir64() to request the real area they need
to get access to.
Fixes: 9f79b78ef744 ("Convert filldir[64]() from __put_user() to unsafe_put_user()")
Signed-off-by: Christophe Leroy <[email protected]>
---
fs/readdir.c | 20 ++++++--------------
1 file changed, 6 insertions(+), 14 deletions(-)
diff --git a/fs/readdir.c b/fs/readdir.c
index d26d5ea4de7b..ef04e5e76c59 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -236,15 +236,11 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
if (dirent && signal_pending(current))
return -EINTR;
- /*
- * Note! This range-checks 'previous' (which may be NULL).
- * The real range was checked in getdents
- */
- if (!user_access_begin(dirent, sizeof(*dirent)))
+ if (dirent && unlikely(put_user(offset, &dirent->d_off)))
goto efault;
- if (dirent)
- unsafe_put_user(offset, &dirent->d_off, efault_end);
dirent = buf->current_dir;
+ if (!user_access_begin(dirent, reclen))
+ goto efault;
unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
@@ -323,15 +319,11 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
if (dirent && signal_pending(current))
return -EINTR;
- /*
- * Note! This range-checks 'previous' (which may be NULL).
- * The real range was checked in getdents
- */
- if (!user_access_begin(dirent, sizeof(*dirent)))
+ if (dirent && unlikely(put_user(offset, &dirent->d_off)))
goto efault;
- if (dirent)
- unsafe_put_user(offset, &dirent->d_off, efault_end);
dirent = buf->current_dir;
+ if (!user_access_begin(dirent, reclen))
+ goto efault;
unsafe_put_user(ino, &dirent->d_ino, efault_end);
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
unsafe_put_user(d_type, &dirent->d_type, efault_end);
--
2.25.0
In preparation of implementing user_access_begin and friends
on powerpc, the book3s/32 version of prevent_user_access() need
to be prepared for user_access_end().
user_access_end() doesn't provide the address and size which
were passed to user_access_begin(), required by prevent_user_access()
to know which segment to modify.
The list of segments which where unprotected by allow_user_access()
are available in current->kuap. But we don't want prevent_user_access()
to read this all the time, especially everytime it is 0 (for instance
because the access was not a write access).
Implement a special direction case named KUAP_SELF. In this case only,
the addr and end are retrieved from current->kuap.
Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/include/asm/book3s/32/kup.h | 25 ++++++++++++++++++------
arch/powerpc/include/asm/kup.h | 1 +
2 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 3c1798e56b55..a99fc3428ac9 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -117,6 +117,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user
return;
end = min(addr + size, TASK_SIZE);
+
current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf);
kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */
}
@@ -127,15 +128,27 @@ static __always_inline void prevent_user_access(void __user *to, const void __us
u32 addr, end;
BUILD_BUG_ON(!__builtin_constant_p(dir));
- if (!(dir & KUAP_W))
- return;
- addr = (__force u32)to;
+ if (dir == KUAP_SELF) {
+ u32 kuap = current->thread.kuap;
- if (unlikely(addr >= TASK_SIZE || !size))
- return;
+ if (unlikely(!kuap))
+ return;
+
+ addr = kuap & 0xf0000000;
+ end = kuap << 28;
+ } else {
+ if (!(dir & KUAP_W))
+ return;
+
+ addr = (__force u32)to;
+
+ if (unlikely(addr >= TASK_SIZE || !size))
+ return;
+
+ end = min(addr + size, TASK_SIZE);
+ }
- end = min(addr + size, TASK_SIZE);
current->thread.kuap = 0;
kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */
}
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 49fd977a498e..0676df6f3f86 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -5,6 +5,7 @@
#define KUAP_R 1
#define KUAP_W 2
#define KUAP_RW (KUAP_R | KUAP_W)
+#define KUAP_SELF 4
#ifdef CONFIG_PPC64
#include <asm/book3s/64/kup-radix.h>
--
2.25.0
At the moment, bad_kuap_fault() reports a fault only if a bad access
to userspace occurred while access to userspace was not granted.
But if a fault occurs for a write outside the allowed userspace
segment(s) that have been unlocked, bad_kuap_fault() fails to
detect it and the kernel loops forever in do_page_fault().
Fix it by checking that the accessed address is within the allowed
range.
Fixes: a68c31fc01ef ("powerpc/32s: Implement Kernel Userspace Access Protection")
Cc: [email protected]
Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/include/asm/book3s/32/kup.h | 9 +++++++--
arch/powerpc/include/asm/book3s/64/kup-radix.h | 3 ++-
arch/powerpc/include/asm/nohash/32/kup-8xx.h | 3 ++-
arch/powerpc/mm/fault.c | 2 +-
4 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index f9dc597b0b86..d88008c8eb85 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -131,12 +131,17 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */
}
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
+ unsigned long begin = regs->kuap & 0xf0000000;
+ unsigned long end = regs->kuap << 28;
+
if (!is_write)
return false;
- return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !");
+ return WARN(address < begin || address >= end,
+ "Bug: write fault blocked by segment registers !");
}
#endif /* CONFIG_PPC_KUAP */
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index f254de956d6a..dbbd22cb80f5 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -95,7 +95,8 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
set_kuap(AMR_KUAP_BLOCKED);
}
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
(regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 1006a427e99c..f2fea603b929 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -46,7 +46,8 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
mtspr(SPRN_MD_AP, MD_APG_KUAP);
}
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000),
"Bug: fault blocked by AP register !");
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b5047f9b5dec..1baeb045f7f4 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -233,7 +233,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
// Read/write fault in a valid region (the exception table search passed
// above), but blocked by KUAP is bad, it can never succeed.
- if (bad_kuap_fault(regs, is_write))
+ if (bad_kuap_fault(regs, address, is_write))
return true;
// What's left? Kernel fault on user in well defined regions (extable
--
2.25.0
NULL addr is a user address. Don't waste time checking it. If
someone tries to access it, it will SIGFAULT the same way as for
address 1, so no need to make it special.
The special case is when not doing a write, in that case we want
to drop the entire function. This is now handled by 'dir' param
and not by the nulity of 'to' anymore.
Also make beginning of prevent_user_access() similar
to beginning of allow_user_access(), and tell the compiler
that writing in kernel space or with a 0 length is unlikely
Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/include/asm/book3s/32/kup.h | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index d765515bd1c1..3c1798e56b55 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -113,7 +113,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user
addr = (__force u32)to;
- if (!addr || addr >= TASK_SIZE || !size)
+ if (unlikely(addr >= TASK_SIZE || !size))
return;
end = min(addr + size, TASK_SIZE);
@@ -124,16 +124,18 @@ static __always_inline void allow_user_access(void __user *to, const void __user
static __always_inline void prevent_user_access(void __user *to, const void __user *from,
u32 size, unsigned long dir)
{
- u32 addr = (__force u32)to;
- u32 end = min(addr + size, TASK_SIZE);
+ u32 addr, end;
BUILD_BUG_ON(!__builtin_constant_p(dir));
if (!(dir & KUAP_W))
return;
- if (!addr || addr >= TASK_SIZE || !size)
+ addr = (__force u32)to;
+
+ if (unlikely(addr >= TASK_SIZE || !size))
return;
+ end = min(addr + size, TASK_SIZE);
current->thread.kuap = 0;
kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */
}
--
2.25.0
On Wed, Jan 22, 2020 at 5:00 AM Christophe Leroy
<[email protected]> wrote:
>
> Modify filldir() and filldir64() to request the real area they need
> to get access to.
Not like this.
This makes the situation for architectures like x86 much worse, since
you now use "put_user()" for the previous dirent filling. Which does
that expensive user access setup/teardown twice again.
So either you need to cover both the dirent's with one call, or you
just need to cover the whole (original) user buffer passed in. But not
this unholy mixing of both unsafe_put_user() and regular put_user().
Linus
On Wed, Jan 22, 2020 at 08:13:12AM -0800, Linus Torvalds wrote:
> On Wed, Jan 22, 2020 at 5:00 AM Christophe Leroy
> <[email protected]> wrote:
> >
> > Modify filldir() and filldir64() to request the real area they need
> > to get access to.
>
> Not like this.
>
> This makes the situation for architectures like x86 much worse, since
> you now use "put_user()" for the previous dirent filling. Which does
> that expensive user access setup/teardown twice again.
>
> So either you need to cover both the dirent's with one call, or you
> just need to cover the whole (original) user buffer passed in. But not
> this unholy mixing of both unsafe_put_user() and regular put_user().
I would suggest simply covering the range from dirent->d_off to
buf->current_dir->d_name[namelen]; they are going to be close to
each other and we need those addresses anyway...
Le 22/01/2020 à 18:41, Al Viro a écrit :
> On Wed, Jan 22, 2020 at 08:13:12AM -0800, Linus Torvalds wrote:
>> On Wed, Jan 22, 2020 at 5:00 AM Christophe Leroy
>> <[email protected]> wrote:
>>>
>>> Modify filldir() and filldir64() to request the real area they need
>>> to get access to.
>>
>> Not like this.
>>
>> This makes the situation for architectures like x86 much worse, since
>> you now use "put_user()" for the previous dirent filling. Which does
>> that expensive user access setup/teardown twice again.
>>
>> So either you need to cover both the dirent's with one call, or you
>> just need to cover the whole (original) user buffer passed in. But not
>> this unholy mixing of both unsafe_put_user() and regular put_user().
>
> I would suggest simply covering the range from dirent->d_off to
> buf->current_dir->d_name[namelen]; they are going to be close to
> each other and we need those addresses anyway...
>
In v2, I'm covering from the beginning of parent dirent to the end of
current dirent.
Christophe