From: Andrey Konovalov <[email protected]>
mm/kasan/kasan.h provides a number of empty defines for a few
arch-specific tagging-related routines, in case the architecture code
didn't define them.
The original idea was to simplify integration in case another architecture
starts supporting memory tagging. However, right now, if any of those
routines are not provided by an architecture, Hardware Tag-Based KASAN
won't work.
Drop the empty defines, as it would be better to get compiler errors
rather than runtime crashes when adding support for a new architecture.
Also drop empty hw_enable_tagging_sync/async/asymm defines for
!CONFIG_KASAN_HW_TAGS case, as those are only used in mm/kasan/hw_tags.c.
Signed-off-by: Andrey Konovalov <[email protected]>
---
mm/kasan/kasan.h | 26 --------------------------
1 file changed, 26 deletions(-)
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index a61eeee3095a..b1895526d02f 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -395,28 +395,6 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
#ifdef CONFIG_KASAN_HW_TAGS
-#ifndef arch_enable_tagging_sync
-#define arch_enable_tagging_sync()
-#endif
-#ifndef arch_enable_tagging_async
-#define arch_enable_tagging_async()
-#endif
-#ifndef arch_enable_tagging_asymm
-#define arch_enable_tagging_asymm()
-#endif
-#ifndef arch_force_async_tag_fault
-#define arch_force_async_tag_fault()
-#endif
-#ifndef arch_get_random_tag
-#define arch_get_random_tag() (0xFF)
-#endif
-#ifndef arch_get_mem_tag
-#define arch_get_mem_tag(addr) (0xFF)
-#endif
-#ifndef arch_set_mem_tag_range
-#define arch_set_mem_tag_range(addr, size, tag, init) ((void *)(addr))
-#endif
-
#define hw_enable_tagging_sync() arch_enable_tagging_sync()
#define hw_enable_tagging_async() arch_enable_tagging_async()
#define hw_enable_tagging_asymm() arch_enable_tagging_asymm()
@@ -430,10 +408,6 @@ void kasan_enable_tagging(void);
#else /* CONFIG_KASAN_HW_TAGS */
-#define hw_enable_tagging_sync()
-#define hw_enable_tagging_async()
-#define hw_enable_tagging_asymm()
-
static inline void kasan_enable_tagging(void) { }
#endif /* CONFIG_KASAN_HW_TAGS */
--
2.25.1
From: Andrey Konovalov <[email protected]>
Rename arch_enable_tagging_sync/async/asymm to
arch_enable_tag_checks_sync/async/asymm, as the new name better reflects
their function.
Also rename kasan_enable_tagging to kasan_enable_hw_tags for the same
reason.
Signed-off-by: Andrey Konovalov <[email protected]>
---
arch/arm64/include/asm/memory.h | 6 +++---
mm/kasan/hw_tags.c | 12 ++++++------
mm/kasan/kasan.h | 10 +++++-----
mm/kasan/kasan_test.c | 2 +-
4 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 78e5163836a0..faf42bff9a60 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -261,9 +261,9 @@ static inline const void *__tag_set(const void *addr, u8 tag)
}
#ifdef CONFIG_KASAN_HW_TAGS
-#define arch_enable_tagging_sync() mte_enable_kernel_sync()
-#define arch_enable_tagging_async() mte_enable_kernel_async()
-#define arch_enable_tagging_asymm() mte_enable_kernel_asymm()
+#define arch_enable_tag_checks_sync() mte_enable_kernel_sync()
+#define arch_enable_tag_checks_async() mte_enable_kernel_async()
+#define arch_enable_tag_checks_asymm() mte_enable_kernel_asymm()
#define arch_force_async_tag_fault() mte_check_tfsr_exit()
#define arch_get_random_tag() mte_get_random_tag()
#define arch_get_mem_tag(addr) mte_get_mem_tag(addr)
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index d1bcb0205327..b092e37b69a7 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -205,7 +205,7 @@ void kasan_init_hw_tags_cpu(void)
* Enable async or asymm modes only when explicitly requested
* through the command line.
*/
- kasan_enable_tagging();
+ kasan_enable_hw_tags();
}
/* kasan_init_hw_tags() is called once on boot CPU. */
@@ -373,19 +373,19 @@ void __kasan_poison_vmalloc(const void *start, unsigned long size)
#endif
-void kasan_enable_tagging(void)
+void kasan_enable_hw_tags(void)
{
if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC)
- hw_enable_tagging_async();
+ hw_enable_tag_checks_async();
else if (kasan_arg_mode == KASAN_ARG_MODE_ASYMM)
- hw_enable_tagging_asymm();
+ hw_enable_tag_checks_asymm();
else
- hw_enable_tagging_sync();
+ hw_enable_tag_checks_sync();
}
#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
-EXPORT_SYMBOL_GPL(kasan_enable_tagging);
+EXPORT_SYMBOL_GPL(kasan_enable_hw_tags);
void kasan_force_async_fault(void)
{
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index b1895526d02f..a1613f5d7608 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -395,20 +395,20 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
#ifdef CONFIG_KASAN_HW_TAGS
-#define hw_enable_tagging_sync() arch_enable_tagging_sync()
-#define hw_enable_tagging_async() arch_enable_tagging_async()
-#define hw_enable_tagging_asymm() arch_enable_tagging_asymm()
+#define hw_enable_tag_checks_sync() arch_enable_tag_checks_sync()
+#define hw_enable_tag_checks_async() arch_enable_tag_checks_async()
+#define hw_enable_tag_checks_asymm() arch_enable_tag_checks_asymm()
#define hw_force_async_tag_fault() arch_force_async_tag_fault()
#define hw_get_random_tag() arch_get_random_tag()
#define hw_get_mem_tag(addr) arch_get_mem_tag(addr)
#define hw_set_mem_tag_range(addr, size, tag, init) \
arch_set_mem_tag_range((addr), (size), (tag), (init))
-void kasan_enable_tagging(void);
+void kasan_enable_hw_tags(void);
#else /* CONFIG_KASAN_HW_TAGS */
-static inline void kasan_enable_tagging(void) { }
+static inline void kasan_enable_hw_tags(void) { }
#endif /* CONFIG_KASAN_HW_TAGS */
diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c
index 627eaf1ee1db..a375776f9896 100644
--- a/mm/kasan/kasan_test.c
+++ b/mm/kasan/kasan_test.c
@@ -148,7 +148,7 @@ static void kasan_test_exit(struct kunit *test)
kasan_sync_fault_possible()) { \
if (READ_ONCE(test_status.report_found) && \
!READ_ONCE(test_status.async_fault)) \
- kasan_enable_tagging(); \
+ kasan_enable_hw_tags(); \
migrate_enable(); \
} \
WRITE_ONCE(test_status.report_found, false); \
--
2.25.1
From: Andrey Konovalov <[email protected]>
Add two new tagging-related routines arch_suppress_tag_checks_start/stop
that suppress MTE tag checking via the TCO register.
These rouines are used in the next patch.
Signed-off-by: Andrey Konovalov <[email protected]>
---
arch/arm64/include/asm/memory.h | 2 ++
mm/kasan/kasan.h | 2 ++
2 files changed, 4 insertions(+)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index faf42bff9a60..26bd4d9aa401 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -264,6 +264,8 @@ static inline const void *__tag_set(const void *addr, u8 tag)
#define arch_enable_tag_checks_sync() mte_enable_kernel_sync()
#define arch_enable_tag_checks_async() mte_enable_kernel_async()
#define arch_enable_tag_checks_asymm() mte_enable_kernel_asymm()
+#define arch_suppress_tag_checks_start() __mte_enable_tco()
+#define arch_suppress_tag_checks_stop() __mte_disable_tco()
#define arch_force_async_tag_fault() mte_check_tfsr_exit()
#define arch_get_random_tag() mte_get_random_tag()
#define arch_get_mem_tag(addr) mte_get_mem_tag(addr)
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index a1613f5d7608..f5e4f5f2ba20 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -398,6 +398,8 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
#define hw_enable_tag_checks_sync() arch_enable_tag_checks_sync()
#define hw_enable_tag_checks_async() arch_enable_tag_checks_async()
#define hw_enable_tag_checks_asymm() arch_enable_tag_checks_asymm()
+#define hw_suppress_tag_checks_start() arch_suppress_tag_checks_start()
+#define hw_suppress_tag_checks_stop() arch_suppress_tag_checks_stop()
#define hw_force_async_tag_fault() arch_force_async_tag_fault()
#define hw_get_random_tag() arch_get_random_tag()
#define hw_get_mem_tag(addr) arch_get_mem_tag(addr)
--
2.25.1
From: Vincenzo Frascino <[email protected]>
The TCO related routines are used in uaccess methods and
load_unaligned_zeropad() but are unrelated to both even if the naming
suggest otherwise.
Improve the readability of the code moving the away from uaccess.h and
pre-pending them with "mte".
Cc: Will Deacon <[email protected]>
Signed-off-by: Vincenzo Frascino <[email protected]>
Signed-off-by: Catalin Marinas <[email protected]>
Signed-off-by: Andrey Konovalov <[email protected]>
---
arch/arm64/include/asm/mte-kasan.h | 81 +++++++++++++++++++++++++
arch/arm64/include/asm/mte.h | 12 ----
arch/arm64/include/asm/uaccess.h | 66 +++-----------------
arch/arm64/include/asm/word-at-a-time.h | 4 +-
4 files changed, 93 insertions(+), 70 deletions(-)
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index 9f79425fc65a..cc9e74876e9a 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -13,8 +13,73 @@
#include <linux/types.h>
+#ifdef CONFIG_KASAN_HW_TAGS
+
+/* Whether the MTE asynchronous mode is enabled. */
+DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
+
+static inline bool system_uses_mte_async_or_asymm_mode(void)
+{
+ return static_branch_unlikely(&mte_async_or_asymm_mode);
+}
+
+#else /* CONFIG_KASAN_HW_TAGS */
+
+static inline bool system_uses_mte_async_or_asymm_mode(void)
+{
+ return false;
+}
+
+#endif /* CONFIG_KASAN_HW_TAGS */
+
#ifdef CONFIG_ARM64_MTE
+/*
+ * The Tag Check Flag (TCF) mode for MTE is per EL, hence TCF0
+ * affects EL0 and TCF affects EL1 irrespective of which TTBR is
+ * used.
+ * The kernel accesses TTBR0 usually with LDTR/STTR instructions
+ * when UAO is available, so these would act as EL0 accesses using
+ * TCF0.
+ * However futex.h code uses exclusives which would be executed as
+ * EL1, this can potentially cause a tag check fault even if the
+ * user disables TCF0.
+ *
+ * To address the problem we set the PSTATE.TCO bit in uaccess_enable()
+ * and reset it in uaccess_disable().
+ *
+ * The Tag check override (TCO) bit disables temporarily the tag checking
+ * preventing the issue.
+ */
+static inline void __mte_disable_tco(void)
+{
+ asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0),
+ ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+}
+
+static inline void __mte_enable_tco(void)
+{
+ asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1),
+ ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+}
+
+/*
+ * These functions disable tag checking only if in MTE async mode
+ * since the sync mode generates exceptions synchronously and the
+ * nofault or load_unaligned_zeropad can handle them.
+ */
+static inline void __mte_disable_tco_async(void)
+{
+ if (system_uses_mte_async_or_asymm_mode())
+ __mte_disable_tco();
+}
+
+static inline void __mte_enable_tco_async(void)
+{
+ if (system_uses_mte_async_or_asymm_mode())
+ __mte_enable_tco();
+}
+
/*
* These functions are meant to be only used from KASAN runtime through
* the arch_*() interface defined in asm/memory.h.
@@ -138,6 +203,22 @@ void mte_enable_kernel_asymm(void);
#else /* CONFIG_ARM64_MTE */
+static inline void __mte_disable_tco(void)
+{
+}
+
+static inline void __mte_enable_tco(void)
+{
+}
+
+static inline void __mte_disable_tco_async(void)
+{
+}
+
+static inline void __mte_enable_tco_async(void)
+{
+}
+
static inline u8 mte_get_ptr_tag(void *ptr)
{
return 0xFF;
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 20dd06d70af5..c028afb1cd0b 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -178,14 +178,6 @@ static inline void mte_disable_tco_entry(struct task_struct *task)
}
#ifdef CONFIG_KASAN_HW_TAGS
-/* Whether the MTE asynchronous mode is enabled. */
-DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
-
-static inline bool system_uses_mte_async_or_asymm_mode(void)
-{
- return static_branch_unlikely(&mte_async_or_asymm_mode);
-}
-
void mte_check_tfsr_el1(void);
static inline void mte_check_tfsr_entry(void)
@@ -212,10 +204,6 @@ static inline void mte_check_tfsr_exit(void)
mte_check_tfsr_el1();
}
#else
-static inline bool system_uses_mte_async_or_asymm_mode(void)
-{
- return false;
-}
static inline void mte_check_tfsr_el1(void)
{
}
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 5c7b2f9d5913..057ec1882326 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -136,55 +136,9 @@ static inline void __uaccess_enable_hw_pan(void)
CONFIG_ARM64_PAN));
}
-/*
- * The Tag Check Flag (TCF) mode for MTE is per EL, hence TCF0
- * affects EL0 and TCF affects EL1 irrespective of which TTBR is
- * used.
- * The kernel accesses TTBR0 usually with LDTR/STTR instructions
- * when UAO is available, so these would act as EL0 accesses using
- * TCF0.
- * However futex.h code uses exclusives which would be executed as
- * EL1, this can potentially cause a tag check fault even if the
- * user disables TCF0.
- *
- * To address the problem we set the PSTATE.TCO bit in uaccess_enable()
- * and reset it in uaccess_disable().
- *
- * The Tag check override (TCO) bit disables temporarily the tag checking
- * preventing the issue.
- */
-static inline void __uaccess_disable_tco(void)
-{
- asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0),
- ARM64_MTE, CONFIG_KASAN_HW_TAGS));
-}
-
-static inline void __uaccess_enable_tco(void)
-{
- asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1),
- ARM64_MTE, CONFIG_KASAN_HW_TAGS));
-}
-
-/*
- * These functions disable tag checking only if in MTE async mode
- * since the sync mode generates exceptions synchronously and the
- * nofault or load_unaligned_zeropad can handle them.
- */
-static inline void __uaccess_disable_tco_async(void)
-{
- if (system_uses_mte_async_or_asymm_mode())
- __uaccess_disable_tco();
-}
-
-static inline void __uaccess_enable_tco_async(void)
-{
- if (system_uses_mte_async_or_asymm_mode())
- __uaccess_enable_tco();
-}
-
static inline void uaccess_disable_privileged(void)
{
- __uaccess_disable_tco();
+ __mte_disable_tco();
if (uaccess_ttbr0_disable())
return;
@@ -194,7 +148,7 @@ static inline void uaccess_disable_privileged(void)
static inline void uaccess_enable_privileged(void)
{
- __uaccess_enable_tco();
+ __mte_enable_tco();
if (uaccess_ttbr0_enable())
return;
@@ -302,8 +256,8 @@ do { \
#define get_user __get_user
/*
- * We must not call into the scheduler between __uaccess_enable_tco_async() and
- * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
+ * We must not call into the scheduler between __mte_enable_tco_async() and
+ * __mte_disable_tco_async(). As `dst` and `src` may contain blocking
* functions, we must evaluate these outside of the critical section.
*/
#define __get_kernel_nofault(dst, src, type, err_label) \
@@ -312,10 +266,10 @@ do { \
__typeof__(src) __gkn_src = (src); \
int __gkn_err = 0; \
\
- __uaccess_enable_tco_async(); \
+ __mte_enable_tco_async(); \
__raw_get_mem("ldr", *((type *)(__gkn_dst)), \
(__force type *)(__gkn_src), __gkn_err, K); \
- __uaccess_disable_tco_async(); \
+ __mte_disable_tco_async(); \
\
if (unlikely(__gkn_err)) \
goto err_label; \
@@ -388,8 +342,8 @@ do { \
#define put_user __put_user
/*
- * We must not call into the scheduler between __uaccess_enable_tco_async() and
- * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
+ * We must not call into the scheduler between __mte_enable_tco_async() and
+ * __mte_disable_tco_async(). As `dst` and `src` may contain blocking
* functions, we must evaluate these outside of the critical section.
*/
#define __put_kernel_nofault(dst, src, type, err_label) \
@@ -398,10 +352,10 @@ do { \
__typeof__(src) __pkn_src = (src); \
int __pkn_err = 0; \
\
- __uaccess_enable_tco_async(); \
+ __mte_enable_tco_async(); \
__raw_put_mem("str", *((type *)(__pkn_src)), \
(__force type *)(__pkn_dst), __pkn_err, K); \
- __uaccess_disable_tco_async(); \
+ __mte_disable_tco_async(); \
\
if (unlikely(__pkn_err)) \
goto err_label; \
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index 1c8e4f2490bf..f3b151ed0d7a 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -55,7 +55,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long ret;
- __uaccess_enable_tco_async();
+ __mte_enable_tco_async();
/* Load word from unaligned pointer addr */
asm(
@@ -65,7 +65,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
: "=&r" (ret)
: "r" (addr), "Q" (*(unsigned long *)addr));
- __uaccess_disable_tco_async();
+ __mte_disable_tco_async();
return ret;
}
--
2.25.1
From: Andrey Konovalov <[email protected]>
KASAN suppresses reports for bad accesses done by the KASAN reporting
code. The reporting code might access poisoned memory for reporting
purposes.
Software KASAN modes do this by suppressing reports during reporting
via current->kasan_depth, the same way they suppress reports during
accesses to poisoned slab metadata.
Hardware Tag-Based KASAN does not use current->kasan_depth, and instead
resets pointer tags for accesses to poisoned memory done by the reporting
code.
Despite that, a recursive report can still happen:
1. On hardware with faulty MTE support. This was observed by Weizhao
Ouyang on a faulty hardware that caused memory tags to randomly change
from time to time.
2. Theoretically, due to a previous MTE-undetected memory corruption.
A recursive report can happen via:
1. Accessing a pointer with a non-reset tag in the reporting code, e.g.
slab->slab_cache, which is what Weizhao Ouyang observed.
2. Theoretically, via external non-annotated routines, e.g. stackdepot.
To resolve this issue, resetting tags for all of the pointers in the
reporting code and all the used external routines would be impractical.
Instead, disable tag checking done by the CPU for the duration of KASAN
reporting for Hardware Tag-Based KASAN.
Without this fix, Hardware Tag-Based KASAN reporting code might deadlock.
Fixes: 2e903b914797 ("kasan, arm64: implement HW_TAGS runtime")
Reported-by: Weizhao Ouyang <[email protected]>
Signed-off-by: Andrey Konovalov <[email protected]>
---
Considering that 1. the bug this patch fixes was only observed on faulty
MTE hardware, and 2. the patch depends on the other patches in this series,
I don't think it's worth backporting it into stable.
---
mm/kasan/report.c | 59 ++++++++++++++++++++++++++++++++++++++---------
1 file changed, 48 insertions(+), 11 deletions(-)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 89078f912827..77a88d85c0a7 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -72,10 +72,18 @@ static int __init kasan_set_multi_shot(char *str)
__setup("kasan_multi_shot", kasan_set_multi_shot);
/*
- * Used to suppress reports within kasan_disable/enable_current() critical
- * sections, which are used for marking accesses to slab metadata.
+ * This function is used to check whether KASAN reports are suppressed for
+ * software KASAN modes via kasan_disable/enable_current() critical sections.
+ *
+ * This is done to avoid:
+ * 1. False-positive reports when accessing slab metadata,
+ * 2. Deadlocking when poisoned memory is accessed by the reporting code.
+ *
+ * Hardware Tag-Based KASAN instead relies on:
+ * For #1: Resetting tags via kasan_reset_tag().
+ * For #2: Supression of tag checks via CPU, see report_suppress_start/end().
*/
-static bool report_suppressed(void)
+static bool report_suppressed_sw(void)
{
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
if (current->kasan_depth)
@@ -84,6 +92,30 @@ static bool report_suppressed(void)
return false;
}
+static void report_suppress_start(void)
+{
+#ifdef CONFIG_KASAN_HW_TAGS
+ /*
+ * Disable migration for the duration of printing a KASAN report, as
+ * hw_suppress_tag_checks_start() disables checks on the current CPU.
+ */
+ migrate_disable();
+ hw_suppress_tag_checks_start();
+#else
+ kasan_disable_current();
+#endif
+}
+
+static void report_suppress_stop(void)
+{
+#ifdef CONFIG_KASAN_HW_TAGS
+ hw_suppress_tag_checks_stop();
+ migrate_enable();
+#else
+ kasan_enable_current();
+#endif
+}
+
/*
* Used to avoid reporting more than one KASAN bug unless kasan_multi_shot
* is enabled. Note that KASAN tests effectively enable kasan_multi_shot
@@ -174,7 +206,7 @@ static void start_report(unsigned long *flags, bool sync)
/* Do not allow LOCKDEP mangling KASAN reports. */
lockdep_off();
/* Make sure we don't end up in loop. */
- kasan_disable_current();
+ report_suppress_start();
spin_lock_irqsave(&report_lock, *flags);
pr_err("==================================================================\n");
}
@@ -192,7 +224,7 @@ static void end_report(unsigned long *flags, void *addr)
panic("kasan.fault=panic set ...\n");
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
lockdep_on();
- kasan_enable_current();
+ report_suppress_stop();
}
static void print_error_description(struct kasan_report_info *info)
@@ -480,9 +512,13 @@ void kasan_report_invalid_free(void *ptr, unsigned long ip, enum kasan_report_ty
struct kasan_report_info info;
/*
- * Do not check report_suppressed(), as an invalid-free cannot be
- * caused by accessing slab metadata and thus should not be
- * suppressed by kasan_disable/enable_current() critical sections.
+ * Do not check report_suppressed_sw(), as an invalid-free cannot be
+ * caused by accessing poisoned memory and thus should not be suppressed
+ * by kasan_disable/enable_current() critical sections.
+ *
+ * Note that for Hardware Tag-Based KASAN, kasan_report_invalid_free()
+ * is triggered by explicit tag checks and not by the ones performed by
+ * the CPU. Thus, reporting invalid-free is not suppressed as well.
*/
if (unlikely(!report_enabled()))
return;
@@ -517,7 +553,7 @@ bool kasan_report(unsigned long addr, size_t size, bool is_write,
unsigned long irq_flags;
struct kasan_report_info info;
- if (unlikely(report_suppressed()) || unlikely(!report_enabled())) {
+ if (unlikely(report_suppressed_sw()) || unlikely(!report_enabled())) {
ret = false;
goto out;
}
@@ -549,8 +585,9 @@ void kasan_report_async(void)
unsigned long flags;
/*
- * Do not check report_suppressed(), as kasan_disable/enable_current()
- * critical sections do not affect Hardware Tag-Based KASAN.
+ * Do not check report_suppressed_sw(), as
+ * kasan_disable/enable_current() critical sections do not affect
+ * Hardware Tag-Based KASAN.
*/
if (unlikely(!report_enabled()))
return;
--
2.25.1
On Sat, 11 Mar 2023 at 00:43, <[email protected]> wrote:
>
> From: Andrey Konovalov <[email protected]>
>
> KASAN suppresses reports for bad accesses done by the KASAN reporting
> code. The reporting code might access poisoned memory for reporting
> purposes.
>
> Software KASAN modes do this by suppressing reports during reporting
> via current->kasan_depth, the same way they suppress reports during
> accesses to poisoned slab metadata.
>
> Hardware Tag-Based KASAN does not use current->kasan_depth, and instead
> resets pointer tags for accesses to poisoned memory done by the reporting
> code.
>
> Despite that, a recursive report can still happen:
>
> 1. On hardware with faulty MTE support. This was observed by Weizhao
> Ouyang on a faulty hardware that caused memory tags to randomly change
> from time to time.
>
> 2. Theoretically, due to a previous MTE-undetected memory corruption.
>
> A recursive report can happen via:
>
> 1. Accessing a pointer with a non-reset tag in the reporting code, e.g.
> slab->slab_cache, which is what Weizhao Ouyang observed.
>
> 2. Theoretically, via external non-annotated routines, e.g. stackdepot.
>
> To resolve this issue, resetting tags for all of the pointers in the
> reporting code and all the used external routines would be impractical.
>
> Instead, disable tag checking done by the CPU for the duration of KASAN
> reporting for Hardware Tag-Based KASAN.
>
> Without this fix, Hardware Tag-Based KASAN reporting code might deadlock.
>
> Fixes: 2e903b914797 ("kasan, arm64: implement HW_TAGS runtime")
> Reported-by: Weizhao Ouyang <[email protected]>
> Signed-off-by: Andrey Konovalov <[email protected]>
>
> ---
>
> Considering that 1. the bug this patch fixes was only observed on faulty
> MTE hardware, and 2. the patch depends on the other patches in this series,
> I don't think it's worth backporting it into stable.
> ---
> mm/kasan/report.c | 59 ++++++++++++++++++++++++++++++++++++++---------
> 1 file changed, 48 insertions(+), 11 deletions(-)
>
> diff --git a/mm/kasan/report.c b/mm/kasan/report.c
> index 89078f912827..77a88d85c0a7 100644
> --- a/mm/kasan/report.c
> +++ b/mm/kasan/report.c
> @@ -72,10 +72,18 @@ static int __init kasan_set_multi_shot(char *str)
> __setup("kasan_multi_shot", kasan_set_multi_shot);
>
> /*
> - * Used to suppress reports within kasan_disable/enable_current() critical
> - * sections, which are used for marking accesses to slab metadata.
> + * This function is used to check whether KASAN reports are suppressed for
> + * software KASAN modes via kasan_disable/enable_current() critical sections.
> + *
> + * This is done to avoid:
> + * 1. False-positive reports when accessing slab metadata,
> + * 2. Deadlocking when poisoned memory is accessed by the reporting code.
> + *
> + * Hardware Tag-Based KASAN instead relies on:
> + * For #1: Resetting tags via kasan_reset_tag().
> + * For #2: Supression of tag checks via CPU, see report_suppress_start/end().
Typo: "Suppression"
> */
> -static bool report_suppressed(void)
> +static bool report_suppressed_sw(void)
> {
> #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
> if (current->kasan_depth)
> @@ -84,6 +92,30 @@ static bool report_suppressed(void)
> return false;
> }
>
> +static void report_suppress_start(void)
> +{
> +#ifdef CONFIG_KASAN_HW_TAGS
> + /*
> + * Disable migration for the duration of printing a KASAN report, as
> + * hw_suppress_tag_checks_start() disables checks on the current CPU.
> + */
> + migrate_disable();
This still allows this task to be preempted by another task. If the
other task is scheduled in right after hw_suppress_tag_checks_start()
then there won't be any tag checking in that task. If HW-tags KASAN is
used as a mitigation technique, that may unnecessarily weaken KASAN,
because right after report_suppress_start(), it does
spin_lock_irqsave() which disables interrupts (and thereby preemption)
anyway.
Why not just use preempt_disable()?
> + hw_suppress_tag_checks_start();
> +#else
> + kasan_disable_current();
> +#endif
> +}
> +
> +static void report_suppress_stop(void)
> +{
> +#ifdef CONFIG_KASAN_HW_TAGS
> + hw_suppress_tag_checks_stop();
> + migrate_enable();
> +#else
> + kasan_enable_current();
> +#endif
> +}
> +
> /*
> * Used to avoid reporting more than one KASAN bug unless kasan_multi_shot
> * is enabled. Note that KASAN tests effectively enable kasan_multi_shot
> @@ -174,7 +206,7 @@ static void start_report(unsigned long *flags, bool sync)
> /* Do not allow LOCKDEP mangling KASAN reports. */
> lockdep_off();
> /* Make sure we don't end up in loop. */
> - kasan_disable_current();
> + report_suppress_start();
> spin_lock_irqsave(&report_lock, *flags);
> pr_err("==================================================================\n");
> }
> @@ -192,7 +224,7 @@ static void end_report(unsigned long *flags, void *addr)
> panic("kasan.fault=panic set ...\n");
> add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> lockdep_on();
> - kasan_enable_current();
> + report_suppress_stop();
> }
>
> static void print_error_description(struct kasan_report_info *info)
> @@ -480,9 +512,13 @@ void kasan_report_invalid_free(void *ptr, unsigned long ip, enum kasan_report_ty
> struct kasan_report_info info;
>
> /*
> - * Do not check report_suppressed(), as an invalid-free cannot be
> - * caused by accessing slab metadata and thus should not be
> - * suppressed by kasan_disable/enable_current() critical sections.
> + * Do not check report_suppressed_sw(), as an invalid-free cannot be
> + * caused by accessing poisoned memory and thus should not be suppressed
> + * by kasan_disable/enable_current() critical sections.
> + *
> + * Note that for Hardware Tag-Based KASAN, kasan_report_invalid_free()
> + * is triggered by explicit tag checks and not by the ones performed by
> + * the CPU. Thus, reporting invalid-free is not suppressed as well.
> */
> if (unlikely(!report_enabled()))
> return;
> @@ -517,7 +553,7 @@ bool kasan_report(unsigned long addr, size_t size, bool is_write,
> unsigned long irq_flags;
> struct kasan_report_info info;
>
> - if (unlikely(report_suppressed()) || unlikely(!report_enabled())) {
> + if (unlikely(report_suppressed_sw()) || unlikely(!report_enabled())) {
> ret = false;
> goto out;
> }
> @@ -549,8 +585,9 @@ void kasan_report_async(void)
> unsigned long flags;
>
> /*
> - * Do not check report_suppressed(), as kasan_disable/enable_current()
> - * critical sections do not affect Hardware Tag-Based KASAN.
> + * Do not check report_suppressed_sw(), as
> + * kasan_disable/enable_current() critical sections do not affect
> + * Hardware Tag-Based KASAN.
> */
> if (unlikely(!report_enabled()))
> return;
> --
> 2.25.1
>
On Mon, Mar 13, 2023 at 12:20 PM Marco Elver <[email protected]> wrote:
>
> > + * Hardware Tag-Based KASAN instead relies on:
> > + * For #1: Resetting tags via kasan_reset_tag().
> > + * For #2: Supression of tag checks via CPU, see report_suppress_start/end().
>
> Typo: "Suppression"
Will fix in v2.
> > +static void report_suppress_start(void)
> > +{
> > +#ifdef CONFIG_KASAN_HW_TAGS
> > + /*
> > + * Disable migration for the duration of printing a KASAN report, as
> > + * hw_suppress_tag_checks_start() disables checks on the current CPU.
> > + */
> > + migrate_disable();
>
> This still allows this task to be preempted by another task. If the
> other task is scheduled in right after hw_suppress_tag_checks_start()
> then there won't be any tag checking in that task. If HW-tags KASAN is
> used as a mitigation technique, that may unnecessarily weaken KASAN,
> because right after report_suppress_start(), it does
> spin_lock_irqsave() which disables interrupts (and thereby preemption)
> anyway.
>
> Why not just use preempt_disable()?
Ah, yes, I intended to do that but forgot to make the change.
I'll wait for comments from arm64 maintainers on the other patches and
then send v2 with a fix.
Thank you, Marco!