2024-03-20 10:19:05

by Alexander Potapenko

[permalink] [raw]
Subject: [PATCH v2 1/3] mm: kmsan: implement kmsan_memmove()

Provide a hook that can be used by custom memcpy implementations to tell
KMSAN that the metadata needs to be copied. Without that, false positive
reports are possible in the cases where KMSAN fails to intercept memory
initialization.

Link: https://lore.kernel.org/all/[email protected]/
Suggested-by: Tetsuo Handa <[email protected]>
Signed-off-by: Alexander Potapenko <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Marco Elver <[email protected]>
Cc: Linus Torvalds <[email protected]>
---
include/linux/kmsan-checks.h | 15 +++++++++++++++
mm/kmsan/hooks.c | 11 +++++++++++
2 files changed, 26 insertions(+)

diff --git a/include/linux/kmsan-checks.h b/include/linux/kmsan-checks.h
index c4cae333deec5..e1082dc40abc2 100644
--- a/include/linux/kmsan-checks.h
+++ b/include/linux/kmsan-checks.h
@@ -61,6 +61,17 @@ void kmsan_check_memory(const void *address, size_t size);
void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
size_t left);

+/**
+ * kmsan_memmove() - Notify KMSAN about a data copy within kernel.
+ * @to: destination address in the kernel.
+ * @from: source address in the kernel.
+ * @size: number of bytes to copy.
+ *
+ * Invoked after non-instrumented version (e.g. implemented using assembly
+ * code) of memmove()/memcpy() is called, in order to copy KMSAN's metadata.
+ */
+void kmsan_memmove(void *to, const void *from, size_t to_copy);
+
#else

static inline void kmsan_poison_memory(const void *address, size_t size,
@@ -78,6 +89,10 @@ static inline void kmsan_copy_to_user(void __user *to, const void *from,
{
}

+static inline void kmsan_memmove(void *to, const void *from, size_t to_copy)
+{
+}
+
#endif

#endif /* _LINUX_KMSAN_CHECKS_H */
diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c
index 5d6e2dee5692a..364f778ee226d 100644
--- a/mm/kmsan/hooks.c
+++ b/mm/kmsan/hooks.c
@@ -285,6 +285,17 @@ void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
}
EXPORT_SYMBOL(kmsan_copy_to_user);

+void kmsan_memmove(void *to, const void *from, size_t size)
+{
+ if (!kmsan_enabled || kmsan_in_runtime())
+ return;
+
+ kmsan_enter_runtime();
+ kmsan_internal_memmove_metadata(to, (void *)from, size);
+ kmsan_leave_runtime();
+}
+EXPORT_SYMBOL(kmsan_memmove);
+
/* Helper function to check an URB. */
void kmsan_handle_urb(const struct urb *urb, bool is_out)
{
--
2.44.0.291.gc1ea87d7ee-goog



2024-03-20 10:19:48

by Alexander Potapenko

[permalink] [raw]
Subject: [PATCH v2 2/3] instrumented.h: add instrument_memcpy_before, instrument_memcpy_after

Bug detection tools based on compiler instrumentation may miss memory
accesses in custom memcpy implementations (such as copy_mc_to_kernel).
Provide instrumentation hooks that tell KASAN, KCSAN, and KMSAN about
such accesses.

Link: https://lore.kernel.org/all/[email protected]/
Signed-off-by: Alexander Potapenko <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Marco Elver <[email protected]>
Cc: Tetsuo Handa <[email protected]>
Cc: Linus Torvalds <[email protected]>

---
v2: fix a copypasto in a comment spotted by Linus
---
include/linux/instrumented.h | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)

diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h
index 1b608e00290aa..711a1f0d1a735 100644
--- a/include/linux/instrumented.h
+++ b/include/linux/instrumented.h
@@ -147,6 +147,41 @@ instrument_copy_from_user_after(const void *to, const void __user *from,
kmsan_unpoison_memory(to, n - left);
}

+/**
+ * instrument_memcpy_before - add instrumentation before non-instrumented memcpy
+ * @to: destination address
+ * @from: source address
+ * @n: number of bytes to copy
+ *
+ * Instrument memory accesses that happen in custom memcpy implementations. The
+ * instrumentation should be inserted before the memcpy call.
+ */
+static __always_inline void instrument_memcpy_before(void *to, const void *from,
+ unsigned long n)
+{
+ kasan_check_write(to, n);
+ kasan_check_read(from, n);
+ kcsan_check_write(to, n);
+ kcsan_check_read(from, n);
+}
+
+/**
+ * instrument_memcpy_after - add instrumentation after non-instrumented memcpy
+ * @to: destination address
+ * @from: source address
+ * @n: number of bytes to copy
+ * @left: number of bytes not copied (if known)
+ *
+ * Instrument memory accesses that happen in custom memcpy implementations. The
+ * instrumentation should be inserted after the memcpy call.
+ */
+static __always_inline void instrument_memcpy_after(void *to, const void *from,
+ unsigned long n,
+ unsigned long left)
+{
+ kmsan_memmove(to, from, n - left);
+}
+
/**
* instrument_get_user() - add instrumentation to get_user()-like macros
* @to: destination variable, may not be address-taken
--
2.44.0.291.gc1ea87d7ee-goog


2024-03-20 10:20:38

by Alexander Potapenko

[permalink] [raw]
Subject: [PATCH v2 3/3] x86: call instrumentation hooks from copy_mc.c

Memory accesses in copy_mc_to_kernel() and copy_mc_to_user() are performed
by assembly routines and are invisible to KASAN, KCSAN, and KMSAN.
Add hooks from instrumentation.h to tell the tools these functions have
memcpy/copy_from_user semantics.

The call to copy_mc_fragile() in copy_mc_fragile_handle_tail() is left
intact, because the latter is only called from the assembly implementation
of copy_mc_fragile(), so the memory accesses in it are covered by the
instrumentation in copy_mc_to_kernel() and copy_mc_to_user().

Link: https://lore.kernel.org/all/[email protected]/
Suggested-by: Linus Torvalds <[email protected]>
Signed-off-by: Alexander Potapenko <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Marco Elver <[email protected]>
Cc: Tetsuo Handa <[email protected]>

---
v2:
- as requested by Linus Torvalds, move the instrumentation outside the
uaccess section
---
arch/x86/lib/copy_mc.c | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
index 6e8b7e600def5..97e88e58567bf 100644
--- a/arch/x86/lib/copy_mc.c
+++ b/arch/x86/lib/copy_mc.c
@@ -4,6 +4,7 @@
#include <linux/jump_label.h>
#include <linux/uaccess.h>
#include <linux/export.h>
+#include <linux/instrumented.h>
#include <linux/string.h>
#include <linux/types.h>

@@ -61,10 +62,20 @@ unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned
*/
unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
{
- if (copy_mc_fragile_enabled)
- return copy_mc_fragile(dst, src, len);
- if (static_cpu_has(X86_FEATURE_ERMS))
- return copy_mc_enhanced_fast_string(dst, src, len);
+ unsigned long ret;
+
+ if (copy_mc_fragile_enabled) {
+ instrument_memcpy_before(dst, src, len);
+ ret = copy_mc_fragile(dst, src, len);
+ instrument_memcpy_after(dst, src, len, ret);
+ return ret;
+ }
+ if (static_cpu_has(X86_FEATURE_ERMS)) {
+ instrument_memcpy_before(dst, src, len);
+ ret = copy_mc_enhanced_fast_string(dst, src, len);
+ instrument_memcpy_after(dst, src, len, ret);
+ return ret;
+ }
memcpy(dst, src, len);
return 0;
}
@@ -75,6 +86,7 @@ unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, un
unsigned long ret;

if (copy_mc_fragile_enabled) {
+ instrument_copy_to_user(dst, src, len);
__uaccess_begin();
ret = copy_mc_fragile((__force void *)dst, src, len);
__uaccess_end();
@@ -82,6 +94,7 @@ unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, un
}

if (static_cpu_has(X86_FEATURE_ERMS)) {
+ instrument_copy_to_user(dst, src, len);
__uaccess_begin();
ret = copy_mc_enhanced_fast_string((__force void *)dst, src, len);
__uaccess_end();
--
2.44.0.291.gc1ea87d7ee-goog


2024-03-20 16:07:05

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] mm: kmsan: implement kmsan_memmove()

On Wed, 20 Mar 2024 at 03:18, Alexander Potapenko <[email protected]> wrote:
>
> Provide a hook that can be used by custom memcpy implementations to tell
> KMSAN that the metadata needs to be copied. Without that, false positive
> reports are possible in the cases where KMSAN fails to intercept memory
> initialization.

Thanks, the series looks fine to me now with the updated 3/3.

I assume it will go through Andrew's -mm tree?

Linus

2024-03-20 20:03:25

by Alexander Potapenko

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] mm: kmsan: implement kmsan_memmove()

On Wed, Mar 20, 2024 at 5:05 PM Linus Torvalds
<[email protected]> wrote:
>
> On Wed, 20 Mar 2024 at 03:18, Alexander Potapenko <[email protected]> wrote:
> >
> > Provide a hook that can be used by custom memcpy implementations to tell
> > KMSAN that the metadata needs to be copied. Without that, false positive
> > reports are possible in the cases where KMSAN fails to intercept memory
> > initialization.
>
> Thanks, the series looks fine to me now with the updated 3/3.
>
> I assume it will go through Andrew's -mm tree?
>
> Linus

Yes, I think this makes the most sense.

2024-03-21 12:22:36

by Marco Elver

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] mm: kmsan: implement kmsan_memmove()

On Wed, 20 Mar 2024 at 11:18, Alexander Potapenko <[email protected]> wrote:
>
> Provide a hook that can be used by custom memcpy implementations to tell
> KMSAN that the metadata needs to be copied. Without that, false positive
> reports are possible in the cases where KMSAN fails to intercept memory
> initialization.
>
> Link: https://lore.kernel.org/all/[email protected]/
> Suggested-by: Tetsuo Handa <[email protected]>
> Signed-off-by: Alexander Potapenko <[email protected]>
> Cc: Dmitry Vyukov <[email protected]>
> Cc: Marco Elver <[email protected]>
> Cc: Linus Torvalds <[email protected]>

Reviewed-by: Marco Elver <[email protected]>

> ---
> include/linux/kmsan-checks.h | 15 +++++++++++++++
> mm/kmsan/hooks.c | 11 +++++++++++
> 2 files changed, 26 insertions(+)
>
> diff --git a/include/linux/kmsan-checks.h b/include/linux/kmsan-checks.h
> index c4cae333deec5..e1082dc40abc2 100644
> --- a/include/linux/kmsan-checks.h
> +++ b/include/linux/kmsan-checks.h
> @@ -61,6 +61,17 @@ void kmsan_check_memory(const void *address, size_t size);
> void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
> size_t left);
>
> +/**
> + * kmsan_memmove() - Notify KMSAN about a data copy within kernel.
> + * @to: destination address in the kernel.
> + * @from: source address in the kernel.
> + * @size: number of bytes to copy.
> + *
> + * Invoked after non-instrumented version (e.g. implemented using assembly
> + * code) of memmove()/memcpy() is called, in order to copy KMSAN's metadata.
> + */
> +void kmsan_memmove(void *to, const void *from, size_t to_copy);
> +
> #else
>
> static inline void kmsan_poison_memory(const void *address, size_t size,
> @@ -78,6 +89,10 @@ static inline void kmsan_copy_to_user(void __user *to, const void *from,
> {
> }
>
> +static inline void kmsan_memmove(void *to, const void *from, size_t to_copy)
> +{
> +}
> +
> #endif
>
> #endif /* _LINUX_KMSAN_CHECKS_H */
> diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c
> index 5d6e2dee5692a..364f778ee226d 100644
> --- a/mm/kmsan/hooks.c
> +++ b/mm/kmsan/hooks.c
> @@ -285,6 +285,17 @@ void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
> }
> EXPORT_SYMBOL(kmsan_copy_to_user);
>
> +void kmsan_memmove(void *to, const void *from, size_t size)
> +{
> + if (!kmsan_enabled || kmsan_in_runtime())
> + return;
> +
> + kmsan_enter_runtime();
> + kmsan_internal_memmove_metadata(to, (void *)from, size);
> + kmsan_leave_runtime();
> +}
> +EXPORT_SYMBOL(kmsan_memmove);
> +
> /* Helper function to check an URB. */
> void kmsan_handle_urb(const struct urb *urb, bool is_out)
> {
> --
> 2.44.0.291.gc1ea87d7ee-goog
>

2024-03-21 12:29:26

by Marco Elver

[permalink] [raw]
Subject: Re: [PATCH v2 2/3] instrumented.h: add instrument_memcpy_before, instrument_memcpy_after

On Wed, 20 Mar 2024 at 11:19, Alexander Potapenko <[email protected]> wrote:
>
> Bug detection tools based on compiler instrumentation may miss memory
> accesses in custom memcpy implementations (such as copy_mc_to_kernel).
> Provide instrumentation hooks that tell KASAN, KCSAN, and KMSAN about
> such accesses.
>
> Link: https://lore.kernel.org/all/[email protected]/
> Signed-off-by: Alexander Potapenko <[email protected]>
> Cc: Dmitry Vyukov <[email protected]>
> Cc: Marco Elver <[email protected]>
> Cc: Tetsuo Handa <[email protected]>
> Cc: Linus Torvalds <[email protected]>

Reviewed-by: Marco Elver <[email protected]>

> ---
> v2: fix a copypasto in a comment spotted by Linus
> ---
> include/linux/instrumented.h | 35 +++++++++++++++++++++++++++++++++++
> 1 file changed, 35 insertions(+)
>
> diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h
> index 1b608e00290aa..711a1f0d1a735 100644
> --- a/include/linux/instrumented.h
> +++ b/include/linux/instrumented.h
> @@ -147,6 +147,41 @@ instrument_copy_from_user_after(const void *to, const void __user *from,
> kmsan_unpoison_memory(to, n - left);
> }
>
> +/**
> + * instrument_memcpy_before - add instrumentation before non-instrumented memcpy
> + * @to: destination address
> + * @from: source address
> + * @n: number of bytes to copy
> + *
> + * Instrument memory accesses that happen in custom memcpy implementations. The
> + * instrumentation should be inserted before the memcpy call.
> + */
> +static __always_inline void instrument_memcpy_before(void *to, const void *from,
> + unsigned long n)
> +{
> + kasan_check_write(to, n);
> + kasan_check_read(from, n);
> + kcsan_check_write(to, n);
> + kcsan_check_read(from, n);
> +}
> +
> +/**
> + * instrument_memcpy_after - add instrumentation after non-instrumented memcpy
> + * @to: destination address
> + * @from: source address
> + * @n: number of bytes to copy
> + * @left: number of bytes not copied (if known)
> + *
> + * Instrument memory accesses that happen in custom memcpy implementations. The
> + * instrumentation should be inserted after the memcpy call.
> + */
> +static __always_inline void instrument_memcpy_after(void *to, const void *from,
> + unsigned long n,
> + unsigned long left)
> +{
> + kmsan_memmove(to, from, n - left);
> +}
> +
> /**
> * instrument_get_user() - add instrumentation to get_user()-like macros
> * @to: destination variable, may not be address-taken
> --
> 2.44.0.291.gc1ea87d7ee-goog
>

2024-03-21 12:31:27

by Marco Elver

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] x86: call instrumentation hooks from copy_mc.c

On Wed, 20 Mar 2024 at 11:19, Alexander Potapenko <[email protected]> wrote:
>
> Memory accesses in copy_mc_to_kernel() and copy_mc_to_user() are performed
> by assembly routines and are invisible to KASAN, KCSAN, and KMSAN.
> Add hooks from instrumentation.h to tell the tools these functions have
> memcpy/copy_from_user semantics.
>
> The call to copy_mc_fragile() in copy_mc_fragile_handle_tail() is left
> intact, because the latter is only called from the assembly implementation
> of copy_mc_fragile(), so the memory accesses in it are covered by the
> instrumentation in copy_mc_to_kernel() and copy_mc_to_user().
>
> Link: https://lore.kernel.org/all/[email protected]/
> Suggested-by: Linus Torvalds <[email protected]>
> Signed-off-by: Alexander Potapenko <[email protected]>
> Cc: Dmitry Vyukov <[email protected]>
> Cc: Marco Elver <[email protected]>
> Cc: Tetsuo Handa <[email protected]>

Reviewed-by: Marco Elver <[email protected]>

> ---
> v2:
> - as requested by Linus Torvalds, move the instrumentation outside the
> uaccess section
> ---
> arch/x86/lib/copy_mc.c | 21 +++++++++++++++++----
> 1 file changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
> index 6e8b7e600def5..97e88e58567bf 100644
> --- a/arch/x86/lib/copy_mc.c
> +++ b/arch/x86/lib/copy_mc.c
> @@ -4,6 +4,7 @@
> #include <linux/jump_label.h>
> #include <linux/uaccess.h>
> #include <linux/export.h>
> +#include <linux/instrumented.h>
> #include <linux/string.h>
> #include <linux/types.h>
>
> @@ -61,10 +62,20 @@ unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned
> */
> unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
> {
> - if (copy_mc_fragile_enabled)
> - return copy_mc_fragile(dst, src, len);
> - if (static_cpu_has(X86_FEATURE_ERMS))
> - return copy_mc_enhanced_fast_string(dst, src, len);
> + unsigned long ret;
> +
> + if (copy_mc_fragile_enabled) {
> + instrument_memcpy_before(dst, src, len);
> + ret = copy_mc_fragile(dst, src, len);
> + instrument_memcpy_after(dst, src, len, ret);
> + return ret;
> + }
> + if (static_cpu_has(X86_FEATURE_ERMS)) {
> + instrument_memcpy_before(dst, src, len);
> + ret = copy_mc_enhanced_fast_string(dst, src, len);
> + instrument_memcpy_after(dst, src, len, ret);
> + return ret;
> + }
> memcpy(dst, src, len);
> return 0;
> }
> @@ -75,6 +86,7 @@ unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, un
> unsigned long ret;
>
> if (copy_mc_fragile_enabled) {
> + instrument_copy_to_user(dst, src, len);
> __uaccess_begin();
> ret = copy_mc_fragile((__force void *)dst, src, len);
> __uaccess_end();
> @@ -82,6 +94,7 @@ unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, un
> }
>
> if (static_cpu_has(X86_FEATURE_ERMS)) {
> + instrument_copy_to_user(dst, src, len);
> __uaccess_begin();
> ret = copy_mc_enhanced_fast_string((__force void *)dst, src, len);
> __uaccess_end();
> --
> 2.44.0.291.gc1ea87d7ee-goog
>