Provide a hook that can be used by custom memcpy implementations to tell
KMSAN that the metadata needs to be copied. Without that, false positive
reports are possible in the cases where KMSAN fails to intercept memory
initialization.
Link: https://lore.kernel.org/all/[email protected]/
Suggested-by: Tetsuo Handa <[email protected]>
Signed-off-by: Alexander Potapenko <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Marco Elver <[email protected]>
Cc: Linus Torvalds <[email protected]>
---
include/linux/kmsan-checks.h | 15 +++++++++++++++
mm/kmsan/hooks.c | 11 +++++++++++
2 files changed, 26 insertions(+)
diff --git a/include/linux/kmsan-checks.h b/include/linux/kmsan-checks.h
index c4cae333deec5..e1082dc40abc2 100644
--- a/include/linux/kmsan-checks.h
+++ b/include/linux/kmsan-checks.h
@@ -61,6 +61,17 @@ void kmsan_check_memory(const void *address, size_t size);
void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
size_t left);
+/**
+ * kmsan_memmove() - Notify KMSAN about a data copy within kernel.
+ * @to: destination address in the kernel.
+ * @from: source address in the kernel.
+ * @size: number of bytes to copy.
+ *
+ * Invoked after non-instrumented version (e.g. implemented using assembly
+ * code) of memmove()/memcpy() is called, in order to copy KMSAN's metadata.
+ */
+void kmsan_memmove(void *to, const void *from, size_t to_copy);
+
#else
static inline void kmsan_poison_memory(const void *address, size_t size,
@@ -78,6 +89,10 @@ static inline void kmsan_copy_to_user(void __user *to, const void *from,
{
}
+static inline void kmsan_memmove(void *to, const void *from, size_t to_copy)
+{
+}
+
#endif
#endif /* _LINUX_KMSAN_CHECKS_H */
diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c
index 5d6e2dee5692a..364f778ee226d 100644
--- a/mm/kmsan/hooks.c
+++ b/mm/kmsan/hooks.c
@@ -285,6 +285,17 @@ void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
}
EXPORT_SYMBOL(kmsan_copy_to_user);
+void kmsan_memmove(void *to, const void *from, size_t size)
+{
+ if (!kmsan_enabled || kmsan_in_runtime())
+ return;
+
+ kmsan_enter_runtime();
+ kmsan_internal_memmove_metadata(to, (void *)from, size);
+ kmsan_leave_runtime();
+}
+EXPORT_SYMBOL(kmsan_memmove);
+
/* Helper function to check an URB. */
void kmsan_handle_urb(const struct urb *urb, bool is_out)
{
--
2.44.0.291.gc1ea87d7ee-goog
Bug detection tools based on compiler instrumentation may miss memory
accesses in custom memcpy implementations (such as copy_mc_to_kernel).
Provide instrumentation hooks that tell KASAN, KCSAN, and KMSAN about
such accesses.
Link: https://lore.kernel.org/all/[email protected]/
Signed-off-by: Alexander Potapenko <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Marco Elver <[email protected]>
Cc: Tetsuo Handa <[email protected]>
Cc: Linus Torvalds <[email protected]>
---
include/linux/instrumented.h | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h
index 1b608e00290aa..f5f81f02506eb 100644
--- a/include/linux/instrumented.h
+++ b/include/linux/instrumented.h
@@ -147,6 +147,41 @@ instrument_copy_from_user_after(const void *to, const void __user *from,
kmsan_unpoison_memory(to, n - left);
}
+/**
+ * instrument_memcpy_before - add instrumentation before non-instrumented memcpy
+ * @to: destination address
+ * @from: source address
+ * @n: number of bytes to copy
+ *
+ * Instrument memory accesses that happen in custom memcpy implementations. The
+ * instrumentation should be inserted before the memcpy call.
+ */
+static __always_inline void instrument_memcpy_before(void *to, const void *from,
+ unsigned long n)
+{
+ kasan_check_write(to, n);
+ kasan_check_read(from, n);
+ kcsan_check_write(to, n);
+ kcsan_check_read(from, n);
+}
+
+/**
+ * instrument_memcpy_after - add instrumentation before non-instrumented memcpy
+ * @to: destination address
+ * @from: source address
+ * @n: number of bytes to copy
+ * @left: number of bytes not copied (if known)
+ *
+ * Instrument memory accesses that happen in custom memcpy implementations. The
+ * instrumentation should be inserted after the memcpy call.
+ */
+static __always_inline void instrument_memcpy_after(void *to, const void *from,
+ unsigned long n,
+ unsigned long left)
+{
+ kmsan_memmove(to, from, n - left);
+}
+
/**
* instrument_get_user() - add instrumentation to get_user()-like macros
* @to: destination variable, may not be address-taken
--
2.44.0.291.gc1ea87d7ee-goog
Memory accesses in copy_mc_to_kernel() and copy_mc_to_user() are performed
by assembly routines and are invisible to KASAN, KCSAN, and KMSAN.
Add hooks from instrumentation.h to tell the tools these functions have
memcpy/copy_from_user semantics.
The call to copy_mc_fragile() in copy_mc_fragile_handle_tail() is left
intact, because the latter is only called from the assembly implementation
of copy_mc_fragile(), so the memory accesses in it are covered by the
instrumentation in copy_mc_to_kernel() and copy_mc_to_user().
Link: https://lore.kernel.org/all/[email protected]/
Suggested-by: Linus Torvalds <[email protected]>
Signed-off-by: Alexander Potapenko <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Marco Elver <[email protected]>
Cc: Tetsuo Handa <[email protected]>
---
arch/x86/lib/copy_mc.c | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
index 6e8b7e600def5..e8aec0dbe6bcf 100644
--- a/arch/x86/lib/copy_mc.c
+++ b/arch/x86/lib/copy_mc.c
@@ -4,6 +4,7 @@
#include <linux/jump_label.h>
#include <linux/uaccess.h>
#include <linux/export.h>
+#include <linux/instrumented.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -61,10 +62,20 @@ unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned
*/
unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
{
- if (copy_mc_fragile_enabled)
- return copy_mc_fragile(dst, src, len);
- if (static_cpu_has(X86_FEATURE_ERMS))
- return copy_mc_enhanced_fast_string(dst, src, len);
+ unsigned long ret;
+
+ if (copy_mc_fragile_enabled) {
+ instrument_memcpy_before(dst, src, len);
+ ret = copy_mc_fragile(dst, src, len);
+ instrument_memcpy_after(dst, src, len, ret);
+ return ret;
+ }
+ if (static_cpu_has(X86_FEATURE_ERMS)) {
+ instrument_memcpy_before(dst, src, len);
+ ret = copy_mc_enhanced_fast_string(dst, src, len);
+ instrument_memcpy_after(dst, src, len, ret);
+ return ret;
+ }
memcpy(dst, src, len);
return 0;
}
@@ -76,6 +87,7 @@ unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, un
if (copy_mc_fragile_enabled) {
__uaccess_begin();
+ instrument_copy_to_user(dst, src, len);
ret = copy_mc_fragile((__force void *)dst, src, len);
__uaccess_end();
return ret;
@@ -83,6 +95,7 @@ unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, un
if (static_cpu_has(X86_FEATURE_ERMS)) {
__uaccess_begin();
+ instrument_copy_to_user(dst, src, len);
ret = copy_mc_enhanced_fast_string((__force void *)dst, src, len);
__uaccess_end();
return ret;
--
2.44.0.291.gc1ea87d7ee-goog
On Tue, 19 Mar 2024 at 09:37, Alexander Potapenko <[email protected]> wrote:
>
> +/**
> + * instrument_memcpy_after - add instrumentation before non-instrumented memcpy
Spot the cut-and-paste.
Linus
On Tue, 19 Mar 2024 at 09:37, Alexander Potapenko <[email protected]> wrote:
>
> if (copy_mc_fragile_enabled) {
> __uaccess_begin();
> + instrument_copy_to_user(dst, src, len);
> ret = copy_mc_fragile((__force void *)dst, src, len);
> __uaccess_end();
I'd actually prefer that instrument_copy_to_user() to be *outside* the
__uaccess_begin.
In fact, I'm a bit surprised that objtool didn't complain about it in that form.
__uaccess_begin() causes the CPU to accept kernel accesses to user
mode, and I don't think instrument_copy_to_user() has any business
actually touching user mode memory.
In fact it might be better to rename the function and change the prototype to
instrument_src(src, len);
because you really can't sanely instrument the destination of a user
copy, but "instrument_src()" might be useful in other situations than
just user copies.
Hmm?
Linus
On 2024/03/20 1:36, Alexander Potapenko wrote:
> @@ -61,10 +62,20 @@ unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned
> */
> unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
> {
> - if (copy_mc_fragile_enabled)
> - return copy_mc_fragile(dst, src, len);
> - if (static_cpu_has(X86_FEATURE_ERMS))
> - return copy_mc_enhanced_fast_string(dst, src, len);
> + unsigned long ret;
> +
> + if (copy_mc_fragile_enabled) {
> + instrument_memcpy_before(dst, src, len);
I feel that instrument_memcpy_before() needs to be called *after*
copy_mc_fragile() etc. , for we can't predict how many bytes will
copy_mc_fragile() etc. actually copy.
> + ret = copy_mc_fragile(dst, src, len);
> + instrument_memcpy_after(dst, src, len, ret);
> + return ret;
> + }
On Tue, Mar 19, 2024 at 6:52 PM Linus Torvalds
<[email protected]> wrote:
>
> On Tue, 19 Mar 2024 at 09:37, Alexander Potapenko <[email protected]> wrote:
> >
> > +/**
> > + * instrument_memcpy_after - add instrumentation before non-instrumented memcpy
>
> Spot the cut-and-paste.
>
> Linus
Nice catch, will fix.
On Wed, Mar 20, 2024 at 4:54 AM Tetsuo Handa
<[email protected]> wrote:
>
> On 2024/03/20 1:36, Alexander Potapenko wrote:
> > @@ -61,10 +62,20 @@ unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned
> > */
> > unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
> > {
> > - if (copy_mc_fragile_enabled)
> > - return copy_mc_fragile(dst, src, len);
> > - if (static_cpu_has(X86_FEATURE_ERMS))
> > - return copy_mc_enhanced_fast_string(dst, src, len);
> > + unsigned long ret;
> > +
> > + if (copy_mc_fragile_enabled) {
> > + instrument_memcpy_before(dst, src, len);
>
> I feel that instrument_memcpy_before() needs to be called *after*
> copy_mc_fragile() etc. , for we can't predict how many bytes will
> copy_mc_fragile() etc. actually copy.
That's why we have both _before() and _after(). We can discuss what
checks need to be done before and after the memcpy call, but calling
instrument_memcpy_before() after copy_mc_fragile() is
counterintuitive.
For KMSAN it is indeed important to only handle `len-ret` bytes that
were actually copied. We want the instrumentation to update the
metadata without triggering an immediate error report, so the update
better be consistent with what the kernel actually did with the
memory.
But for KASAN/KCSAN we can afford more aggressive checks.
First, if we postpone them after the actual memory accesses happen,
the kernel may panic on the invalid access without a decent error
report.
Second, even if in a particular case only `len-ret` bytes were copied,
the caller probably expected both `src` and `dst` to have `len`
addressable bytes.
Checking for the whole length in this case is more likely to detect a
real error than produce a false positive.
On Tue, Mar 19, 2024 at 6:58 PM Linus Torvalds
<[email protected]> wrote:
>
> On Tue, 19 Mar 2024 at 09:37, Alexander Potapenko <[email protected]> wrote:
> >
> > if (copy_mc_fragile_enabled) {
> > __uaccess_begin();
> > + instrument_copy_to_user(dst, src, len);
> > ret = copy_mc_fragile((__force void *)dst, src, len);
> > __uaccess_end();
>
> I'd actually prefer that instrument_copy_to_user() to be *outside* the
> __uaccess_begin.
Good point, this is doable.
>
> In fact, I'm a bit surprised that objtool didn't complain about it in that form.
This is because a bunch of KMSAN functions is ignored by objtool:
https://elixir.bootlin.com/linux/latest/source/tools/objtool/check.c#L1200
> __uaccess_begin() causes the CPU to accept kernel accesses to user
> mode, and I don't think instrument_copy_to_user() has any business
> actually touching user mode memory.
Ack.
> In fact it might be better to rename the function and change the prototype to
>
> instrument_src(src, len);
>
> because you really can't sanely instrument the destination of a user
> copy, but "instrument_src()" might be useful in other situations than
> just user copies.
Right now at least for KMSAN it is important to distinguish between a
usercopy and e.g. a URB submission: both are checked using the same
function, but depending on what is happening the report title is
different.
The destination parameter is also used by KMSAN to print fancier error reports.
For an infoleak we show the target userspace address together with
other information, e.g.:
BUG: KMSAN: kernel-infoleak in instrument_copy_to_user
include/linux/instrumented.h:114 [inline]
...
Bytes 34-35 of 36 are uninitialized
Memory access of size 36 starts at ffff8881152e5680
Data copied to user address 00007ffc9a4a12a0
It comes in handy when debugging reproducers locally.
Future debugging tools may also need more insight into the semantics
of the instrumented accesses.
On 2024/03/20 18:29, Alexander Potapenko wrote:
> But for KASAN/KCSAN we can afford more aggressive checks.
> First, if we postpone them after the actual memory accesses happen,
> the kernel may panic on the invalid access without a decent error
> report.
> Second, even if in a particular case only `len-ret` bytes were copied,
> the caller probably expected both `src` and `dst` to have `len`
> addressable bytes.
> Checking for the whole length in this case is more likely to detect a
> real error than produce a false positive.
KASAN/KCSAN care about whether the requested address range is accessible but
do not care about whether the requested address range was actually accessed?
By the way, we have the same problem for copy_page() and I was thinking about
https://lkml.kernel.org/r/[email protected] .
But given that instrument_memcpy_{before,after} are added,
how do we want to use instrument_memcpy_{before,after} for copy_page() ?
Should we rename assembly version of copy_page() so that we don't need to use
tricky wrapping like below?
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index cc6b8e087192..b9b794656880 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -9,6 +9,7 @@
#include <asm/alternative.h>
#include <linux/kmsan-checks.h>
+#include <linux/instrumented.h>
/* duplicated to the one in bootmem.h */
extern unsigned long max_pfn;
@@ -59,6 +60,13 @@ static inline void clear_page(void *page)
}
void copy_page(void *to, void *from);
+#define copy_page(to, from) do { \
+ void *_to = (to); \
+ void *_from = (from); \
+ instrument_memcpy_before(_to, _from, PAGE_SIZE); \
+ copy_page(_to, _from); \
+ instrument_memcpy_after(_to, _from, PAGE_SIZE, 0); \
+} while (0)
#ifdef CONFIG_X86_5LEVEL
/*
On Wed, Mar 20, 2024 at 11:40 AM Tetsuo Handa
<[email protected]> wrote:
>
> On 2024/03/20 18:29, Alexander Potapenko wrote:
> > But for KASAN/KCSAN we can afford more aggressive checks.
> > First, if we postpone them after the actual memory accesses happen,
> > the kernel may panic on the invalid access without a decent error
> > report.
> > Second, even if in a particular case only `len-ret` bytes were copied,
> > the caller probably expected both `src` and `dst` to have `len`
> > addressable bytes.
> > Checking for the whole length in this case is more likely to detect a
> > real error than produce a false positive.
>
> KASAN/KCSAN care about whether the requested address range is accessible but
> do not care about whether the requested address range was actually accessed?
I am not exactly sure under which circumstances a copy_mc may fail,
but let's consider how copy_to_user() is handled.
In instrument_copy_to_user()
(https://elixir.bootlin.com/linux/latest/source/include/linux/instrumented.h#L110)
we check the whole ranges before the copy is performed.
Assume there is buggy code in the kernel that allocates N bytes for
some buffer and then copies N+1 bytes from that buffer to the
userspace.
If we are (un)lucky enough, the userspace code may be always
allocating the destination buffer in a way that prevents
copy_to_user() from copying more than N bytes.
Yet it is possible to provide a userspace buffer that is big enough to
trigger an OOB read in the kernel, and reporting this issue is usually
the right thing to do, even if it does not occur during testing.
Moreover, if dst can receive N+1 bytes, but the OOB read happens to
crash the kernel, we'll get a simple panic report instead of a KASAN
report, if we decide to perform the check after copying the data.
>
> By the way, we have the same problem for copy_page() and I was thinking about
> https://lkml.kernel.org/r/[email protected] .
> But given that instrument_memcpy_{before,after} are added,
> how do we want to use instrument_memcpy_{before,after} for copy_page() ?
> Should we rename assembly version of copy_page() so that we don't need to use
> tricky wrapping like below?
I think renaming the assembly version and providing a `static inline
void copy_page()` in arch/x86/include/asm/page_64.h should be cleaner,
but it is up to x86 people to decide.
The patch below seems to work:
============================================
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index cc6b8e087192e..70ee3da32397e 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -8,6 +8,7 @@
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
+#include <linux/instrumented.h>
#include <linux/kmsan-checks.h>
/* duplicated to the one in bootmem.h */
@@ -58,7 +59,14 @@ static inline void clear_page(void *page)
: "cc", "memory", "rax", "rcx");
}
-void copy_page(void *to, void *from);
+void copy_page_asm(void *to, void *from);
+
+static inline void copy_page(void *to, void *from)
+{
+ instrument_memcpy_before(to, from, PAGE_SIZE);
+ copy_page_asm(to, from);
+ instrument_memcpy_after(to, from, PAGE_SIZE, 0);
+}
#ifdef CONFIG_X86_5LEVEL
/*
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index d6ae793d08faf..e65b70406d48a 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -13,13 +13,13 @@
* prefetch distance based on SMP/UP.
*/
ALIGN
-SYM_FUNC_START(copy_page)
+SYM_FUNC_START(copy_page_asm)
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx
rep movsq
RET
-SYM_FUNC_END(copy_page)
-EXPORT_SYMBOL(copy_page)
+SYM_FUNC_END(copy_page_asm)
+EXPORT_SYMBOL(copy_page_asm)
SYM_FUNC_START_LOCAL(copy_page_regs)
subq $2*8, %rsp
============================================