2021-02-11 16:45:37

by Vincenzo Frascino

[permalink] [raw]
Subject: [PATCH v13 4/7] arm64: mte: Enable TCO in functions that can read beyond buffer limits

load_unaligned_zeropad() and __get/put_kernel_nofault() functions can
read passed some buffer limits which may include some MTE granule with a
different tag.

When MTE async mode is enable, the load operation crosses the boundaries
and the next granule has a different tag the PE sets the TFSR_EL1.TF1 bit
as if an asynchronous tag fault is happened.

Enable Tag Check Override (TCO) in these functions before the load and
disable it afterwards to prevent this to happen.

Note: The same condition can be hit in MTE sync mode but we deal with it
through the exception handling.
In the current implementation, mte_async_mode flag is set only at boot
time but in future kasan might acquire some runtime features that
that change the mode dynamically, hence we disable it when sync mode is
selected for future proof.

Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Reported-by: Branislav Rankov <[email protected]>
Tested-by: Branislav Rankov <[email protected]>
Signed-off-by: Vincenzo Frascino <[email protected]>
---
arch/arm64/include/asm/uaccess.h | 24 ++++++++++++++++++++++++
arch/arm64/include/asm/word-at-a-time.h | 4 ++++
arch/arm64/kernel/mte.c | 16 ++++++++++++++++
3 files changed, 44 insertions(+)

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 0deb88467111..a857f8f82aeb 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -188,6 +188,26 @@ static inline void __uaccess_enable_tco(void)
ARM64_MTE, CONFIG_KASAN_HW_TAGS));
}

+/* Whether the MTE asynchronous mode is enabled. */
+DECLARE_STATIC_KEY_FALSE(mte_async_mode);
+
+/*
+ * These functions disable tag checking only if in MTE async mode
+ * since the sync mode generates exceptions synchronously and the
+ * nofault or load_unaligned_zeropad can handle them.
+ */
+static inline void __uaccess_disable_tco_async(void)
+{
+ if (static_branch_unlikely(&mte_async_mode))
+ __uaccess_disable_tco();
+}
+
+static inline void __uaccess_enable_tco_async(void)
+{
+ if (static_branch_unlikely(&mte_async_mode))
+ __uaccess_enable_tco();
+}
+
static inline void uaccess_disable_privileged(void)
{
__uaccess_disable_tco();
@@ -307,8 +327,10 @@ do { \
do { \
int __gkn_err = 0; \
\
+ __uaccess_enable_tco_async(); \
__raw_get_mem("ldr", *((type *)(dst)), \
(__force type *)(src), __gkn_err); \
+ __uaccess_disable_tco_async(); \
if (unlikely(__gkn_err)) \
goto err_label; \
} while (0)
@@ -380,8 +402,10 @@ do { \
do { \
int __pkn_err = 0; \
\
+ __uaccess_enable_tco_async(); \
__raw_put_mem("str", *((type *)(src)), \
(__force type *)(dst), __pkn_err); \
+ __uaccess_disable_tco_async(); \
if (unlikely(__pkn_err)) \
goto err_label; \
} while(0)
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index 3333950b5909..c62d9fa791aa 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -55,6 +55,8 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long ret, offset;

+ __uaccess_enable_tco_async();
+
/* Load word from unaligned pointer addr */
asm(
"1: ldr %0, %3\n"
@@ -76,6 +78,8 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
: "=&r" (ret), "=&r" (offset)
: "r" (addr), "Q" (*(unsigned long *)addr));

+ __uaccess_disable_tco_async();
+
return ret;
}

diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 706b7ab75f31..65ecb86dd886 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -26,6 +26,10 @@ u64 gcr_kernel_excl __ro_after_init;

static bool report_fault_once = true;

+/* Whether the MTE asynchronous mode is enabled. */
+DEFINE_STATIC_KEY_FALSE(mte_async_mode);
+EXPORT_SYMBOL_GPL(mte_async_mode);
+
static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
{
pte_t old_pte = READ_ONCE(*ptep);
@@ -119,12 +123,24 @@ static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
void mte_enable_kernel_sync(void)
{
__mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC);
+
+ /*
+ * This function is called on each active smp core at boot
+ * time, hence we do not need to take cpu_hotplug_lock again.
+ */
+ static_branch_disable_cpuslocked(&mte_async_mode);
}
EXPORT_SYMBOL_GPL(mte_enable_kernel_sync);

void mte_enable_kernel_async(void)
{
__mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC);
+
+ /*
+ * This function is called on each active smp core at boot
+ * time, hence we do not need to take cpu_hotplug_lock again.
+ */
+ static_branch_enable_cpuslocked(&mte_async_mode);
}
EXPORT_SYMBOL_GPL(mte_enable_kernel_async);

--
2.30.0


2021-02-12 17:25:50

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH v13 4/7] arm64: mte: Enable TCO in functions that can read beyond buffer limits

On Thu, Feb 11, 2021 at 03:33:50PM +0000, Vincenzo Frascino wrote:
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index 706b7ab75f31..65ecb86dd886 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -26,6 +26,10 @@ u64 gcr_kernel_excl __ro_after_init;
>
> static bool report_fault_once = true;
>
> +/* Whether the MTE asynchronous mode is enabled. */
> +DEFINE_STATIC_KEY_FALSE(mte_async_mode);
> +EXPORT_SYMBOL_GPL(mte_async_mode);
> +
> static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
> {
> pte_t old_pte = READ_ONCE(*ptep);
> @@ -119,12 +123,24 @@ static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
> void mte_enable_kernel_sync(void)
> {
> __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC);
> +
> + /*
> + * This function is called on each active smp core at boot
> + * time, hence we do not need to take cpu_hotplug_lock again.
> + */
> + static_branch_disable_cpuslocked(&mte_async_mode);
> }
> EXPORT_SYMBOL_GPL(mte_enable_kernel_sync);
>
> void mte_enable_kernel_async(void)
> {
> __mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC);
> +
> + /*
> + * This function is called on each active smp core at boot
> + * time, hence we do not need to take cpu_hotplug_lock again.
> + */
> + static_branch_enable_cpuslocked(&mte_async_mode);
> }

Sorry, I missed the cpuslocked aspect before. Is there any reason you
need to use this API here? I suggested to add it to the
mte_enable_kernel_sync() because kasan may at some point do this
dynamically at run-time, so the boot-time argument doesn't hold. But
it's also incorrect as this function will be called for hot-plugged
CPUs as well after boot.

The only reason for static_branch_*_cpuslocked() is if it's called from
a region that already invoked cpus_read_lock() which I don't think is
the case here.

--
Catalin

2021-02-22 12:08:14

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v13 4/7] arm64: mte: Enable TCO in functions that can read beyond buffer limits



On 2/12/21 5:21 PM, Catalin Marinas wrote:
>> +
>> + /*
>> + * This function is called on each active smp core at boot
>> + * time, hence we do not need to take cpu_hotplug_lock again.
>> + */
>> + static_branch_enable_cpuslocked(&mte_async_mode);
>> }
> Sorry, I missed the cpuslocked aspect before. Is there any reason you
> need to use this API here? I suggested to add it to the
> mte_enable_kernel_sync() because kasan may at some point do this
> dynamically at run-time, so the boot-time argument doesn't hold. But
> it's also incorrect as this function will be called for hot-plugged
> CPUs as well after boot.
>
> The only reason for static_branch_*_cpuslocked() is if it's called from
> a region that already invoked cpus_read_lock() which I don't think is
> the case here.

I agree with your analysis on why static_branch_*_cpuslocked() is needed, in
fact cpus_read_lock() takes cpu_hotplug_lock as per comment on top of the line
of code.

If I try to take that lock when enabling the secondary cores I end up in the
situation below:

[ 0.283402] smp: Bringing up secondary CPUs ...
....
[ 5.890963] Call trace:
[ 5.891050] dump_backtrace+0x0/0x19c
[ 5.891212] show_stack+0x18/0x70
[ 5.891373] dump_stack+0xd0/0x12c
[ 5.891531] dequeue_task_idle+0x28/0x40
[ 5.891686] __schedule+0x45c/0x6c0
[ 5.891851] schedule+0x70/0x104
[ 5.892010] percpu_rwsem_wait+0xe8/0x104
[ 5.892174] __percpu_down_read+0x5c/0x90
[ 5.892332] percpu_down_read.constprop.0+0xbc/0xd4
[ 5.892497] cpus_read_lock+0x10/0x1c
[ 5.892660] static_key_enable+0x18/0x3c
[ 5.892823] mte_enable_kernel_async+0x40/0x70
[ 5.892988] kasan_init_hw_tags_cpu+0x50/0x60
[ 5.893144] cpu_enable_mte+0x24/0x70
[ 5.893304] verify_local_cpu_caps+0x58/0x120
[ 5.893465] check_local_cpu_capabilities+0x18/0x1f0
[ 5.893626] secondary_start_kernel+0xe0/0x190
[ 5.893790] 0x0
[ 5.893975] bad: scheduling from the idle thread!
[ 5.894065] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W
5.11.0-rc7-10587-g22cd50bcfcf-dirty #6

and the kernel panics.

Note: there is a look of msg drop in between enabling the secondary and the
first clean stack trace.

--
Regards,
Vincenzo

2021-02-22 18:01:42

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH v13 4/7] arm64: mte: Enable TCO in functions that can read beyond buffer limits

On Mon, Feb 22, 2021 at 12:08:07PM +0000, Vincenzo Frascino wrote:
> On 2/12/21 5:21 PM, Catalin Marinas wrote:
> >> +
> >> + /*
> >> + * This function is called on each active smp core at boot
> >> + * time, hence we do not need to take cpu_hotplug_lock again.
> >> + */
> >> + static_branch_enable_cpuslocked(&mte_async_mode);
> >> }
> > Sorry, I missed the cpuslocked aspect before. Is there any reason you
> > need to use this API here? I suggested to add it to the
> > mte_enable_kernel_sync() because kasan may at some point do this
> > dynamically at run-time, so the boot-time argument doesn't hold. But
> > it's also incorrect as this function will be called for hot-plugged
> > CPUs as well after boot.
> >
> > The only reason for static_branch_*_cpuslocked() is if it's called from
> > a region that already invoked cpus_read_lock() which I don't think is
> > the case here.
>
> I agree with your analysis on why static_branch_*_cpuslocked() is needed, in
> fact cpus_read_lock() takes cpu_hotplug_lock as per comment on top of the line
> of code.
>
> If I try to take that lock when enabling the secondary cores I end up in the
> situation below:
>
> [ 0.283402] smp: Bringing up secondary CPUs ...
> ....
> [ 5.890963] Call trace:
> [ 5.891050] dump_backtrace+0x0/0x19c
> [ 5.891212] show_stack+0x18/0x70
> [ 5.891373] dump_stack+0xd0/0x12c
> [ 5.891531] dequeue_task_idle+0x28/0x40
> [ 5.891686] __schedule+0x45c/0x6c0
> [ 5.891851] schedule+0x70/0x104
> [ 5.892010] percpu_rwsem_wait+0xe8/0x104
> [ 5.892174] __percpu_down_read+0x5c/0x90
> [ 5.892332] percpu_down_read.constprop.0+0xbc/0xd4
> [ 5.892497] cpus_read_lock+0x10/0x1c
> [ 5.892660] static_key_enable+0x18/0x3c
> [ 5.892823] mte_enable_kernel_async+0x40/0x70
> [ 5.892988] kasan_init_hw_tags_cpu+0x50/0x60
> [ 5.893144] cpu_enable_mte+0x24/0x70
> [ 5.893304] verify_local_cpu_caps+0x58/0x120
> [ 5.893465] check_local_cpu_capabilities+0x18/0x1f0
> [ 5.893626] secondary_start_kernel+0xe0/0x190
> [ 5.893790] 0x0
> [ 5.893975] bad: scheduling from the idle thread!
> [ 5.894065] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W
> 5.11.0-rc7-10587-g22cd50bcfcf-dirty #6
>
> and the kernel panics.

That's because cpu_hotplug_lock is not a spinlock but a semaphore which
implies sleeping. I don't think avoiding taking this semaphore
altogether (as in the *_cpuslocked functions) is the correct workaround.

The mte_enable_kernel_async() function is called on each secondary CPU
but we don't really need to attempt to toggle the static key on each of
them as they all have the same configuration. Maybe do something like:

if (!static_branch_unlikely(&mte_async_mode)))
static_branch_enable(&mte_async_mode);

so that it's only set on the boot CPU.

The alternative is to use a per-CPU mask/variable instead of static
branches but it's probably too expensive for those functions that were
meant to improve performance.

We'll still have an issue with dynamically switching the async/sync mode
at run-time. Luckily kasan doesn't do this now. The problem is that
until the last CPU have been switched from async to sync, we can't
toggle the static label. When switching from sync to async, we need
to do it on the first CPU being switched.

So, I think currently we can set the mte_async_mode label to true in
mte_enable_kernel_async(), with the 'if' check above. For
mte_enable_kernel_sync(), don't bother with setting the key to false but
place a WARN_ONCE if the mte_async_mode is true. We can revisit it if
kasan ever gains this run-time switch mode.

--
Catalin

2021-02-23 12:12:47

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH v13 4/7] arm64: mte: Enable TCO in functions that can read beyond buffer limits

On Tue, Feb 23, 2021 at 10:56:46AM +0000, Vincenzo Frascino wrote:
> On 2/22/21 5:58 PM, Catalin Marinas wrote:
> > We'll still have an issue with dynamically switching the async/sync mode
> > at run-time. Luckily kasan doesn't do this now. The problem is that
> > until the last CPU have been switched from async to sync, we can't
> > toggle the static label. When switching from sync to async, we need
> > to do it on the first CPU being switched.
>
> I totally agree on this point. In the case of runtime switching we might need
> the rethink completely the strategy and depends a lot on what we want to allow
> and what not. For the kernel I imagine we will need to expose something in sysfs
> that affects all the cores and then maybe stop_machine() to propagate it to all
> the cores. Do you think having some of the cores running in sync mode and some
> in async is a viable solution?

stop_machine() is an option indeed. I think it's still possible to run
some cores in async while others in sync but the static key here would
only be toggled when no async CPUs are left.

> Probably it is worth to discuss it further once we cross that bridge.

Yes. For now, a warning should do so that we don't forget.

--
Catalin

2021-02-23 12:53:05

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v13 4/7] arm64: mte: Enable TCO in functions that can read beyond buffer limits

On Tue, Feb 23, 2021 at 12:05:32PM +0000, Catalin Marinas wrote:
> On Tue, Feb 23, 2021 at 10:56:46AM +0000, Vincenzo Frascino wrote:
> > On 2/22/21 5:58 PM, Catalin Marinas wrote:
> > > We'll still have an issue with dynamically switching the async/sync mode
> > > at run-time. Luckily kasan doesn't do this now. The problem is that
> > > until the last CPU have been switched from async to sync, we can't
> > > toggle the static label. When switching from sync to async, we need
> > > to do it on the first CPU being switched.
> >
> > I totally agree on this point. In the case of runtime switching we might need
> > the rethink completely the strategy and depends a lot on what we want to allow
> > and what not. For the kernel I imagine we will need to expose something in sysfs
> > that affects all the cores and then maybe stop_machine() to propagate it to all
> > the cores. Do you think having some of the cores running in sync mode and some
> > in async is a viable solution?
>
> stop_machine() is an option indeed. I think it's still possible to run
> some cores in async while others in sync but the static key here would
> only be toggled when no async CPUs are left.

Just as a general point, but if we expose stop_machine() via sysfs we
probably want to limit that to privileged users so you can't DoS the system
by spamming into the file.

Will

2021-02-23 13:19:10

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH v13 4/7] arm64: mte: Enable TCO in functions that can read beyond buffer limits

On Tue, Feb 23, 2021 at 12:49:52PM +0000, Will Deacon wrote:
> On Tue, Feb 23, 2021 at 12:05:32PM +0000, Catalin Marinas wrote:
> > On Tue, Feb 23, 2021 at 10:56:46AM +0000, Vincenzo Frascino wrote:
> > > On 2/22/21 5:58 PM, Catalin Marinas wrote:
> > > > We'll still have an issue with dynamically switching the async/sync mode
> > > > at run-time. Luckily kasan doesn't do this now. The problem is that
> > > > until the last CPU have been switched from async to sync, we can't
> > > > toggle the static label. When switching from sync to async, we need
> > > > to do it on the first CPU being switched.
> > >
> > > I totally agree on this point. In the case of runtime switching we might need
> > > the rethink completely the strategy and depends a lot on what we want to allow
> > > and what not. For the kernel I imagine we will need to expose something in sysfs
> > > that affects all the cores and then maybe stop_machine() to propagate it to all
> > > the cores. Do you think having some of the cores running in sync mode and some
> > > in async is a viable solution?
> >
> > stop_machine() is an option indeed. I think it's still possible to run
> > some cores in async while others in sync but the static key here would
> > only be toggled when no async CPUs are left.
>
> Just as a general point, but if we expose stop_machine() via sysfs we
> probably want to limit that to privileged users so you can't DoS the system
> by spamming into the file.

Definitely. Anyway, that's a later kasan feature if they'd find it
useful. Currently the mode is set at boot from cmdline.

--
Catalin

2021-02-23 14:10:35

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v13 4/7] arm64: mte: Enable TCO in functions that can read beyond buffer limits



On 2/22/21 5:58 PM, Catalin Marinas wrote:
> That's because cpu_hotplug_lock is not a spinlock but a semaphore which
> implies sleeping. I don't think avoiding taking this semaphore
> altogether (as in the *_cpuslocked functions) is the correct workaround.
>

Thinking at it a second time I agree, it is not a good idea avoiding to take the
semaphore in this case.

> The mte_enable_kernel_async() function is called on each secondary CPU
> but we don't really need to attempt to toggle the static key on each of
> them as they all have the same configuration. Maybe do something like:
>
> if (!static_branch_unlikely(&mte_async_mode)))
> static_branch_enable(&mte_async_mode);
>
> so that it's only set on the boot CPU.
>

This should work, maybe with a comment that if we plan to introduce runtime
switching in between async and sync in future we need to revisit our strategy.

> The alternative is to use a per-CPU mask/variable instead of static
> branches but it's probably too expensive for those functions that were
> meant to improve performance.
>

I would not go for this approach because the reason why we introduced static
branches instead of having a normal variable saving the state was performances.

> We'll still have an issue with dynamically switching the async/sync mode
> at run-time. Luckily kasan doesn't do this now. The problem is that
> until the last CPU have been switched from async to sync, we can't
> toggle the static label. When switching from sync to async, we need
> to do it on the first CPU being switched.
>

I totally agree on this point. In the case of runtime switching we might need
the rethink completely the strategy and depends a lot on what we want to allow
and what not. For the kernel I imagine we will need to expose something in sysfs
that affects all the cores and then maybe stop_machine() to propagate it to all
the cores. Do you think having some of the cores running in sync mode and some
in async is a viable solution?

Probably it is worth to discuss it further once we cross that bridge.

> So, I think currently we can set the mte_async_mode label to true in
> mte_enable_kernel_async(), with the 'if' check above. For
> mte_enable_kernel_sync(), don't bother with setting the key to false but
> place a WARN_ONCE if the mte_async_mode is true. We can revisit it if
> kasan ever gains this run-time switch mode.

Indeed, this should work for now.

--
Regards,
Vincenzo

2021-02-23 14:23:34

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v13 4/7] arm64: mte: Enable TCO in functions that can read beyond buffer limits


On 2/23/21 12:05 PM, Catalin Marinas wrote:
>> I totally agree on this point. In the case of runtime switching we might need
>> the rethink completely the strategy and depends a lot on what we want to allow
>> and what not. For the kernel I imagine we will need to expose something in sysfs
>> that affects all the cores and then maybe stop_machine() to propagate it to all
>> the cores. Do you think having some of the cores running in sync mode and some
>> in async is a viable solution?
> stop_machine() is an option indeed. I think it's still possible to run
> some cores in async while others in sync but the static key here would
> only be toggled when no async CPUs are left.
>

In such a case we might need to track the state based on cpuid() and have a mask
that tells us when cpus are all sync.
Not as expensive as stop_machine() but still requires a valid use case to be
introduced according to me.

--
Regards,
Vincenzo

2021-02-23 14:48:29

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v13 4/7] arm64: mte: Enable TCO in functions that can read beyond buffer limits

On 2/23/21 12:49 PM, Will Deacon wrote:
>>> I totally agree on this point. In the case of runtime switching we might need
>>> the rethink completely the strategy and depends a lot on what we want to allow
>>> and what not. For the kernel I imagine we will need to expose something in sysfs
>>> that affects all the cores and then maybe stop_machine() to propagate it to all
>>> the cores. Do you think having some of the cores running in sync mode and some
>>> in async is a viable solution?
>> stop_machine() is an option indeed. I think it's still possible to run
>> some cores in async while others in sync but the static key here would
>> only be toggled when no async CPUs are left.
> Just as a general point, but if we expose stop_machine() via sysfs we
> probably want to limit that to privileged users so you can't DoS the system
> by spamming into the file.

I agree, if we ever introduce the runtime switching and go for this option we
should make sure that we do it safely.

--
Regards,
Vincenzo