2019-01-17 06:24:25

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v3 1/2] mm: add probe_user_read()

In powerpc code, there are several places implementing safe
access to user data. This is sometimes implemented using
probe_kernel_address() with additional access_ok() verification,
sometimes with get_user() enclosed in a pagefault_disable()/enable()
pair, etc. :
show_user_instructions()
bad_stack_expansion()
p9_hmi_special_emu()
fsl_pci_mcheck_exception()
read_user_stack_64()
read_user_stack_32() on PPC64
read_user_stack_32() on PPC32
power_pmu_bhrb_to()

In the same spirit as probe_kernel_read(), this patch adds
probe_user_read().

probe_user_read() does the same as probe_kernel_read() but
first checks that it is really a user address.

The patch defines this function as a static inline so the "size"
variable can be examined for const-ness by the check_object_size()
in __copy_from_user_inatomic()

Signed-off-by: Christophe Leroy <[email protected]>
---
v3: Moved 'Returns:" comment after description.
Explained in the commit log why the function is defined static inline

v2: Added "Returns:" comment and removed probe_user_address()

include/linux/uaccess.h | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 37b226e8df13..ef99edd63da3 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -263,6 +263,40 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
#define probe_kernel_address(addr, retval) \
probe_kernel_read(&retval, addr, sizeof(retval))

+/**
+ * probe_user_read(): safely attempt to read from a user location
+ * @dst: pointer to the buffer that shall take the data
+ * @src: address to read from
+ * @size: size of the data chunk
+ *
+ * Safely read from address @src to the buffer at @dst. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ *
+ * We ensure that the copy_from_user is executed in atomic context so that
+ * do_page_fault() doesn't attempt to take mmap_sem. This makes
+ * probe_user_read() suitable for use within regions where the caller
+ * already holds mmap_sem, or other locks which nest inside mmap_sem.
+ *
+ * Returns: 0 on success, -EFAULT on error.
+ */
+
+#ifndef probe_user_read
+static __always_inline long probe_user_read(void *dst, const void __user *src,
+ size_t size)
+{
+ long ret;
+
+ if (!access_ok(src, size))
+ return -EFAULT;
+
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(dst, src, size);
+ pagefault_enable();
+
+ return ret ? -EFAULT : 0;
+}
+#endif
+
#ifndef user_access_begin
#define user_access_begin(ptr,len) access_ok(ptr, len)
#define user_access_end() do { } while (0)
--
2.13.3



2019-01-17 06:25:33

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v3 2/2] powerpc: use probe_user_read()

Instead of opencoding, use probe_user_read() to failessly
read a user location.

Signed-off-by: Christophe Leroy <[email protected]>
---
v3: No change

v2: Using probe_user_read() instead of probe_user_address()

arch/powerpc/kernel/process.c | 12 +-----------
arch/powerpc/mm/fault.c | 6 +-----
arch/powerpc/perf/callchain.c | 20 +++-----------------
arch/powerpc/perf/core-book3s.c | 8 +-------
arch/powerpc/sysdev/fsl_pci.c | 10 ++++------
5 files changed, 10 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ce393df243aa..6a4b59d574c2 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1298,16 +1298,6 @@ void show_user_instructions(struct pt_regs *regs)

pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));

- /*
- * Make sure the NIP points at userspace, not kernel text/data or
- * elsewhere.
- */
- if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) {
- pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
- current->comm, current->pid);
- return;
- }
-
seq_buf_init(&s, buf, sizeof(buf));

while (n) {
@@ -1318,7 +1308,7 @@ void show_user_instructions(struct pt_regs *regs)
for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
int instr;

- if (probe_kernel_address((const void *)pc, instr)) {
+ if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) {
seq_buf_printf(&s, "XXXXXXXX ");
continue;
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 887f11bcf330..ec74305fa330 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -276,12 +276,8 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
access_ok(nip, sizeof(*nip))) {
unsigned int inst;
- int res;

- pagefault_disable();
- res = __get_user_inatomic(inst, nip);
- pagefault_enable();
- if (!res)
+ if (!probe_user_read(&inst, nip, sizeof(inst)))
return !store_updates_sp(inst);
*must_retry = true;
}
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 0af051a1974e..0680efb2237b 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -159,12 +159,8 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
((unsigned long)ptr & 7))
return -EFAULT;

- pagefault_disable();
- if (!__get_user_inatomic(*ret, ptr)) {
- pagefault_enable();
+ if (!probe_user_read(ret, ptr, sizeof(*ret)))
return 0;
- }
- pagefault_enable();

return read_user_stack_slow(ptr, ret, 8);
}
@@ -175,12 +171,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
((unsigned long)ptr & 3))
return -EFAULT;

- pagefault_disable();
- if (!__get_user_inatomic(*ret, ptr)) {
- pagefault_enable();
+ if (!probe_user_read(ret, ptr, sizeof(*ret)))
return 0;
- }
- pagefault_enable();

return read_user_stack_slow(ptr, ret, 4);
}
@@ -307,17 +299,11 @@ static inline int current_is_64bit(void)
*/
static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
{
- int rc;
-
if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
((unsigned long)ptr & 3))
return -EFAULT;

- pagefault_disable();
- rc = __get_user_inatomic(*ret, ptr);
- pagefault_enable();
-
- return rc;
+ return probe_user_read(ret, ptr, sizeof(*ret));
}

static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b0723002a396..4b64ddf0db68 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -416,7 +416,6 @@ static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
static __u64 power_pmu_bhrb_to(u64 addr)
{
unsigned int instr;
- int ret;
__u64 target;

if (is_kernel_addr(addr)) {
@@ -427,13 +426,8 @@ static __u64 power_pmu_bhrb_to(u64 addr)
}

/* Userspace: need copy instruction here then translate it */
- pagefault_disable();
- ret = __get_user_inatomic(instr, (unsigned int __user *)addr);
- if (ret) {
- pagefault_enable();
+ if (probe_user_read(&instr, (unsigned int __user *)addr, sizeof(instr)))
return 0;
- }
- pagefault_enable();

target = branch_target(&instr);
if ((!target) || (instr & BRANCH_ABSOLUTE))
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 918be816b097..c8a1b26489f5 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -1068,13 +1068,11 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs)
addr += mfspr(SPRN_MCAR);

if (is_in_pci_mem_space(addr)) {
- if (user_mode(regs)) {
- pagefault_disable();
- ret = get_user(inst, (__u32 __user *)regs->nip);
- pagefault_enable();
- } else {
+ if (user_mode(regs))
+ ret = probe_user_read(&inst, (void __user *)regs->nip,
+ sizeof(inst));
+ else
ret = probe_kernel_address((void *)regs->nip, inst);
- }

if (!ret && mcheck_handle_load(regs, inst)) {
regs->nip += 4;
--
2.13.3


2019-01-31 04:21:00

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] powerpc: use probe_user_read()

Christophe Leroy <[email protected]> writes:

> Instead of opencoding, use probe_user_read() to failessly
> read a user location.
>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> v3: No change
>
> v2: Using probe_user_read() instead of probe_user_address()
>
> arch/powerpc/kernel/process.c | 12 +-----------
> arch/powerpc/mm/fault.c | 6 +-----
> arch/powerpc/perf/callchain.c | 20 +++-----------------
> arch/powerpc/perf/core-book3s.c | 8 +-------
> arch/powerpc/sysdev/fsl_pci.c | 10 ++++------
> 5 files changed, 10 insertions(+), 46 deletions(-)

Looks good.

Acked-by: Michael Ellerman <[email protected]>

cheers

> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index ce393df243aa..6a4b59d574c2 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -1298,16 +1298,6 @@ void show_user_instructions(struct pt_regs *regs)
>
> pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
>
> - /*
> - * Make sure the NIP points at userspace, not kernel text/data or
> - * elsewhere.
> - */
> - if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) {
> - pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
> - current->comm, current->pid);
> - return;
> - }
> -
> seq_buf_init(&s, buf, sizeof(buf));
>
> while (n) {
> @@ -1318,7 +1308,7 @@ void show_user_instructions(struct pt_regs *regs)
> for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
> int instr;
>
> - if (probe_kernel_address((const void *)pc, instr)) {
> + if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) {
> seq_buf_printf(&s, "XXXXXXXX ");
> continue;
> }
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 887f11bcf330..ec74305fa330 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -276,12 +276,8 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
> if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
> access_ok(nip, sizeof(*nip))) {
> unsigned int inst;
> - int res;
>
> - pagefault_disable();
> - res = __get_user_inatomic(inst, nip);
> - pagefault_enable();
> - if (!res)
> + if (!probe_user_read(&inst, nip, sizeof(inst)))
> return !store_updates_sp(inst);
> *must_retry = true;
> }
> diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
> index 0af051a1974e..0680efb2237b 100644
> --- a/arch/powerpc/perf/callchain.c
> +++ b/arch/powerpc/perf/callchain.c
> @@ -159,12 +159,8 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
> ((unsigned long)ptr & 7))
> return -EFAULT;
>
> - pagefault_disable();
> - if (!__get_user_inatomic(*ret, ptr)) {
> - pagefault_enable();
> + if (!probe_user_read(ret, ptr, sizeof(*ret)))
> return 0;
> - }
> - pagefault_enable();
>
> return read_user_stack_slow(ptr, ret, 8);
> }
> @@ -175,12 +171,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
> ((unsigned long)ptr & 3))
> return -EFAULT;
>
> - pagefault_disable();
> - if (!__get_user_inatomic(*ret, ptr)) {
> - pagefault_enable();
> + if (!probe_user_read(ret, ptr, sizeof(*ret)))
> return 0;
> - }
> - pagefault_enable();
>
> return read_user_stack_slow(ptr, ret, 4);
> }
> @@ -307,17 +299,11 @@ static inline int current_is_64bit(void)
> */
> static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
> {
> - int rc;
> -
> if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
> ((unsigned long)ptr & 3))
> return -EFAULT;
>
> - pagefault_disable();
> - rc = __get_user_inatomic(*ret, ptr);
> - pagefault_enable();
> -
> - return rc;
> + return probe_user_read(ret, ptr, sizeof(*ret));
> }
>
> static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index b0723002a396..4b64ddf0db68 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -416,7 +416,6 @@ static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
> static __u64 power_pmu_bhrb_to(u64 addr)
> {
> unsigned int instr;
> - int ret;
> __u64 target;
>
> if (is_kernel_addr(addr)) {
> @@ -427,13 +426,8 @@ static __u64 power_pmu_bhrb_to(u64 addr)
> }
>
> /* Userspace: need copy instruction here then translate it */
> - pagefault_disable();
> - ret = __get_user_inatomic(instr, (unsigned int __user *)addr);
> - if (ret) {
> - pagefault_enable();
> + if (probe_user_read(&instr, (unsigned int __user *)addr, sizeof(instr)))
> return 0;
> - }
> - pagefault_enable();
>
> target = branch_target(&instr);
> if ((!target) || (instr & BRANCH_ABSOLUTE))
> diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
> index 918be816b097..c8a1b26489f5 100644
> --- a/arch/powerpc/sysdev/fsl_pci.c
> +++ b/arch/powerpc/sysdev/fsl_pci.c
> @@ -1068,13 +1068,11 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs)
> addr += mfspr(SPRN_MCAR);
>
> if (is_in_pci_mem_space(addr)) {
> - if (user_mode(regs)) {
> - pagefault_disable();
> - ret = get_user(inst, (__u32 __user *)regs->nip);
> - pagefault_enable();
> - } else {
> + if (user_mode(regs))
> + ret = probe_user_read(&inst, (void __user *)regs->nip,
> + sizeof(inst));
> + else
> ret = probe_kernel_address((void *)regs->nip, inst);
> - }
>
> if (!ret && mcheck_handle_load(regs, inst)) {
> regs->nip += 4;
> --
> 2.13.3

2019-01-31 04:26:29

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] mm: add probe_user_read()

Christophe Leroy <[email protected]> writes:

> In powerpc code, there are several places implementing safe
> access to user data. This is sometimes implemented using
> probe_kernel_address() with additional access_ok() verification,
> sometimes with get_user() enclosed in a pagefault_disable()/enable()
> pair, etc. :
> show_user_instructions()
> bad_stack_expansion()
> p9_hmi_special_emu()
> fsl_pci_mcheck_exception()
> read_user_stack_64()
> read_user_stack_32() on PPC64
> read_user_stack_32() on PPC32
> power_pmu_bhrb_to()
>
> In the same spirit as probe_kernel_read(), this patch adds
> probe_user_read().
>
> probe_user_read() does the same as probe_kernel_read() but
> first checks that it is really a user address.
>
> The patch defines this function as a static inline so the "size"
> variable can be examined for const-ness by the check_object_size()
> in __copy_from_user_inatomic()
>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> v3: Moved 'Returns:" comment after description.
> Explained in the commit log why the function is defined static inline
>
> v2: Added "Returns:" comment and removed probe_user_address()
>
> include/linux/uaccess.h | 34 ++++++++++++++++++++++++++++++++++
> 1 file changed, 34 insertions(+)
>
> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> index 37b226e8df13..ef99edd63da3 100644
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -263,6 +263,40 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
> #define probe_kernel_address(addr, retval) \
> probe_kernel_read(&retval, addr, sizeof(retval))
>
> +/**
> + * probe_user_read(): safely attempt to read from a user location
> + * @dst: pointer to the buffer that shall take the data
> + * @src: address to read from
> + * @size: size of the data chunk
> + *
> + * Safely read from address @src to the buffer at @dst. If a kernel fault
> + * happens, handle that and return -EFAULT.
> + *
> + * We ensure that the copy_from_user is executed in atomic context so that
> + * do_page_fault() doesn't attempt to take mmap_sem. This makes
> + * probe_user_read() suitable for use within regions where the caller
> + * already holds mmap_sem, or other locks which nest inside mmap_sem.
> + *
> + * Returns: 0 on success, -EFAULT on error.
> + */
> +
> +#ifndef probe_user_read
> +static __always_inline long probe_user_read(void *dst, const void __user *src,
> + size_t size)
> +{
> + long ret;
> +

I wonder if we should explicitly switch to USER_DS here?

That would be sort of unusual, but the whole reason for this helper
existing is to make sure we safely read from user memory and not
accidentally from kernel.

cheers

> + if (!access_ok(src, size))
> + return -EFAULT;
> +
> + pagefault_disable();
> + ret = __copy_from_user_inatomic(dst, src, size);
> + pagefault_enable();
> +
> + return ret ? -EFAULT : 0;
> +}
> +#endif
> +
> #ifndef user_access_begin
> #define user_access_begin(ptr,len) access_ok(ptr, len)
> #define user_access_end() do { } while (0)
> --
> 2.13.3

2019-02-05 17:58:14

by Murilo Opsfelder Araujo

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] mm: add probe_user_read()

Hi, Christophe.

On Wed, Jan 16, 2019 at 04:59:27PM +0000, Christophe Leroy wrote:
> In powerpc code, there are several places implementing safe
> access to user data. This is sometimes implemented using
> probe_kernel_address() with additional access_ok() verification,
> sometimes with get_user() enclosed in a pagefault_disable()/enable()
> pair, etc. :
> show_user_instructions()
> bad_stack_expansion()
> p9_hmi_special_emu()
> fsl_pci_mcheck_exception()
> read_user_stack_64()
> read_user_stack_32() on PPC64
> read_user_stack_32() on PPC32
> power_pmu_bhrb_to()
>
> In the same spirit as probe_kernel_read(), this patch adds
> probe_user_read().
>
> probe_user_read() does the same as probe_kernel_read() but
> first checks that it is really a user address.
>
> The patch defines this function as a static inline so the "size"
> variable can be examined for const-ness by the check_object_size()
> in __copy_from_user_inatomic()
>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> v3: Moved 'Returns:" comment after description.
> Explained in the commit log why the function is defined static inline
>
> v2: Added "Returns:" comment and removed probe_user_address()
>
> include/linux/uaccess.h | 34 ++++++++++++++++++++++++++++++++++
> 1 file changed, 34 insertions(+)
>
> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> index 37b226e8df13..ef99edd63da3 100644
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -263,6 +263,40 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
> #define probe_kernel_address(addr, retval) \
> probe_kernel_read(&retval, addr, sizeof(retval))
>
> +/**
> + * probe_user_read(): safely attempt to read from a user location
> + * @dst: pointer to the buffer that shall take the data
> + * @src: address to read from
> + * @size: size of the data chunk
> + *
> + * Safely read from address @src to the buffer at @dst. If a kernel fault
> + * happens, handle that and return -EFAULT.
> + *
> + * We ensure that the copy_from_user is executed in atomic context so that
> + * do_page_fault() doesn't attempt to take mmap_sem. This makes
> + * probe_user_read() suitable for use within regions where the caller
> + * already holds mmap_sem, or other locks which nest inside mmap_sem.
> + *
> + * Returns: 0 on success, -EFAULT on error.
> + */
> +
> +#ifndef probe_user_read
> +static __always_inline long probe_user_read(void *dst, const void __user *src,
> + size_t size)
> +{
> + long ret;
> +
> + if (!access_ok(src, size))
> + return -EFAULT;

Hopefully, there is still time for a minor comment.

Do we need to differentiate the returned error here, e.g.: return
-EACCES?

I wonder if there will be situations where callers need to know why
probe_user_read() failed.

Besides that:

Acked-by: Murilo Opsfelder Araujo <[email protected]>

> +
> + pagefault_disable();
> + ret = __copy_from_user_inatomic(dst, src, size);
> + pagefault_enable();
> +
> + return ret ? -EFAULT : 0;
> +}
> +#endif
> +
> #ifndef user_access_begin
> #define user_access_begin(ptr,len) access_ok(ptr, len)
> #define user_access_end() do { } while (0)
> --
> 2.13.3
>

--
Murilo


2019-02-07 05:05:08

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] mm: add probe_user_read()

Murilo Opsfelder Araujo <[email protected]> writes:
>> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
>> index 37b226e8df13..ef99edd63da3 100644
>> --- a/include/linux/uaccess.h
>> +++ b/include/linux/uaccess.h
>> @@ -263,6 +263,40 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
>> #define probe_kernel_address(addr, retval) \
>> probe_kernel_read(&retval, addr, sizeof(retval))
>>
>> +/**
>> + * probe_user_read(): safely attempt to read from a user location
>> + * @dst: pointer to the buffer that shall take the data
>> + * @src: address to read from
>> + * @size: size of the data chunk
>> + *
>> + * Safely read from address @src to the buffer at @dst. If a kernel fault
>> + * happens, handle that and return -EFAULT.
>> + *
>> + * We ensure that the copy_from_user is executed in atomic context so that
>> + * do_page_fault() doesn't attempt to take mmap_sem. This makes
>> + * probe_user_read() suitable for use within regions where the caller
>> + * already holds mmap_sem, or other locks which nest inside mmap_sem.
>> + *
>> + * Returns: 0 on success, -EFAULT on error.
>> + */
>> +
>> +#ifndef probe_user_read
>> +static __always_inline long probe_user_read(void *dst, const void __user *src,
>> + size_t size)
>> +{
>> + long ret;
>> +
>> + if (!access_ok(src, size))
>> + return -EFAULT;
>
> Hopefully, there is still time for a minor comment.
>
> Do we need to differentiate the returned error here, e.g.: return
> -EACCES?
>
> I wonder if there will be situations where callers need to know why
> probe_user_read() failed.

It's pretty standard to return EFAULT when an access_ok() check fails,
so I think using EFAULT here is the safest option.

If we used EACCES we'd need to be more careful about converting code to
use this helper, as doing so would potentially cause the error value to
change, which in some cases is not OK.

cheers

2019-02-07 10:28:57

by Jann Horn

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] mm: add probe_user_read()

On Thu, Feb 7, 2019 at 10:22 AM Christophe Leroy
<[email protected]> wrote:
> In powerpc code, there are several places implementing safe
> access to user data. This is sometimes implemented using
> probe_kernel_address() with additional access_ok() verification,
> sometimes with get_user() enclosed in a pagefault_disable()/enable()
> pair, etc. :
> show_user_instructions()
> bad_stack_expansion()
> p9_hmi_special_emu()
> fsl_pci_mcheck_exception()
> read_user_stack_64()
> read_user_stack_32() on PPC64
> read_user_stack_32() on PPC32
> power_pmu_bhrb_to()
>
> In the same spirit as probe_kernel_read(), this patch adds
> probe_user_read().
>
> probe_user_read() does the same as probe_kernel_read() but
> first checks that it is really a user address.
>
> The patch defines this function as a static inline so the "size"
> variable can be examined for const-ness by the check_object_size()
> in __copy_from_user_inatomic()
>
> Signed-off-by: Christophe Leroy <[email protected]>



> ---
> v3: Moved 'Returns:" comment after description.
> Explained in the commit log why the function is defined static inline
>
> v2: Added "Returns:" comment and removed probe_user_address()
>
> include/linux/uaccess.h | 34 ++++++++++++++++++++++++++++++++++
> 1 file changed, 34 insertions(+)
>
> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> index 37b226e8df13..ef99edd63da3 100644
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -263,6 +263,40 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
> #define probe_kernel_address(addr, retval) \
> probe_kernel_read(&retval, addr, sizeof(retval))
>
> +/**
> + * probe_user_read(): safely attempt to read from a user location
> + * @dst: pointer to the buffer that shall take the data
> + * @src: address to read from
> + * @size: size of the data chunk
> + *
> + * Safely read from address @src to the buffer at @dst. If a kernel fault
> + * happens, handle that and return -EFAULT.
> + *
> + * We ensure that the copy_from_user is executed in atomic context so that
> + * do_page_fault() doesn't attempt to take mmap_sem. This makes
> + * probe_user_read() suitable for use within regions where the caller
> + * already holds mmap_sem, or other locks which nest inside mmap_sem.
> + *
> + * Returns: 0 on success, -EFAULT on error.
> + */
> +
> +#ifndef probe_user_read
> +static __always_inline long probe_user_read(void *dst, const void __user *src,
> + size_t size)
> +{
> + long ret;
> +
> + if (!access_ok(src, size))
> + return -EFAULT;

If this happens in code that's running with KERNEL_DS, the access_ok()
is a no-op. If this helper is only intended for accessing real
userspace memory, it would be more robust to add
set_fs(USER_DS)/set_fs(oldfs) around this thing. Looking at the
functions you're referring to in the commit message, e.g.
show_user_instructions() does an explicit `__access_ok(pc,
NR_INSN_TO_PRINT * sizeof(int), USER_DS)` to get the same effect.
(However, __access_ok() looks like it's horribly broken on x86 from
what I can tell, because it's going to use the generic version that
always returns 1...)

> + pagefault_disable();
> + ret = __copy_from_user_inatomic(dst, src, size);
> + pagefault_enable();
> +
> + return ret ? -EFAULT : 0;
> +}
> +#endif
> +
> #ifndef user_access_begin
> #define user_access_begin(ptr,len) access_ok(ptr, len)
> #define user_access_end() do { } while (0)
> --
> 2.13.3
>
>

2019-02-07 13:56:45

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] mm: add probe_user_read()

On Wed, Jan 16, 2019 at 04:59:27PM +0000, Christophe Leroy wrote:
> v3: Moved 'Returns:" comment after description.
> Explained in the commit log why the function is defined static inline
>
> v2: Added "Returns:" comment and removed probe_user_address()

The correct spelling is 'Return:', not 'Returns:':

Return values
~~~~~~~~~~~~

The return value, if any, should be described in a dedicated section
named ``Return``.


2019-02-08 03:02:26

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] mm: add probe_user_read()

Jann Horn <[email protected]> writes:
> On Thu, Feb 7, 2019 at 10:22 AM Christophe Leroy
> <[email protected]> wrote:
>> In powerpc code, there are several places implementing safe
>> access to user data. This is sometimes implemented using
>> probe_kernel_address() with additional access_ok() verification,
>> sometimes with get_user() enclosed in a pagefault_disable()/enable()
>> pair, etc. :
>> show_user_instructions()
>> bad_stack_expansion()
>> p9_hmi_special_emu()
>> fsl_pci_mcheck_exception()
>> read_user_stack_64()
>> read_user_stack_32() on PPC64
>> read_user_stack_32() on PPC32
>> power_pmu_bhrb_to()
>>
>> In the same spirit as probe_kernel_read(), this patch adds
>> probe_user_read().
>>
>> probe_user_read() does the same as probe_kernel_read() but
>> first checks that it is really a user address.
>>
>> The patch defines this function as a static inline so the "size"
>> variable can be examined for const-ness by the check_object_size()
>> in __copy_from_user_inatomic()
>>
>> Signed-off-by: Christophe Leroy <[email protected]>
>
>
>
>> ---
>> v3: Moved 'Returns:" comment after description.
>> Explained in the commit log why the function is defined static inline
>>
>> v2: Added "Returns:" comment and removed probe_user_address()
>>
>> include/linux/uaccess.h | 34 ++++++++++++++++++++++++++++++++++
>> 1 file changed, 34 insertions(+)
>>
>> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
>> index 37b226e8df13..ef99edd63da3 100644
>> --- a/include/linux/uaccess.h
>> +++ b/include/linux/uaccess.h
>> @@ -263,6 +263,40 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
>> #define probe_kernel_address(addr, retval) \
>> probe_kernel_read(&retval, addr, sizeof(retval))
>>
>> +/**
>> + * probe_user_read(): safely attempt to read from a user location
>> + * @dst: pointer to the buffer that shall take the data
>> + * @src: address to read from
>> + * @size: size of the data chunk
>> + *
>> + * Safely read from address @src to the buffer at @dst. If a kernel fault
>> + * happens, handle that and return -EFAULT.
>> + *
>> + * We ensure that the copy_from_user is executed in atomic context so that
>> + * do_page_fault() doesn't attempt to take mmap_sem. This makes
>> + * probe_user_read() suitable for use within regions where the caller
>> + * already holds mmap_sem, or other locks which nest inside mmap_sem.
>> + *
>> + * Returns: 0 on success, -EFAULT on error.
>> + */
>> +
>> +#ifndef probe_user_read
>> +static __always_inline long probe_user_read(void *dst, const void __user *src,
>> + size_t size)
>> +{
>> + long ret;
>> +
>> + if (!access_ok(src, size))
>> + return -EFAULT;
>
> If this happens in code that's running with KERNEL_DS, the access_ok()
> is a no-op. If this helper is only intended for accessing real
> userspace memory, it would be more robust to add
> set_fs(USER_DS)/set_fs(oldfs) around this thing. Looking at the
> functions you're referring to in the commit message, e.g.
> show_user_instructions() does an explicit `__access_ok(pc,
> NR_INSN_TO_PRINT * sizeof(int), USER_DS)` to get the same effect.

Yeah I raised the same question up thread.

I think we're both right :) - it should explicitly set USER_DS.

There's precedent for that in the code you mentioned and also in the
perf code, eg:

88b0193d9418 ("perf/callchain: Force USER_DS when invoking perf_callchain_user()")


cheers