2016-12-07 20:01:00

by Christopher Covington

[permalink] [raw]
Subject: [PATCH 1/3] arm64: Define Qualcomm Technologies Falkor v1 CPU

From: Shanker Donthineni <[email protected]>

This patch adds the cputype info for Qualcomm Technologies ARMv8 CPU
implementer ID 0x51 and part number for Falkor v1 in cputype.h.

Signed-off-by: Shanker Donthineni <[email protected]>
Signed-off-by: Christopher Covington <[email protected]>
---
arch/arm64/include/asm/cputype.h | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 26a68dd..ee60561 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -71,6 +71,7 @@
#define ARM_CPU_IMP_APM 0x50
#define ARM_CPU_IMP_CAVIUM 0x43
#define ARM_CPU_IMP_BRCM 0x42
+#define ARM_CPU_IMP_QCOM 0x51

#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
@@ -84,10 +85,13 @@

#define BRCM_CPU_PART_VULCAN 0x516

+#define QCOM_CPU_PART_FALKOR_V1 0x800
+
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
+#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)

#ifndef __ASSEMBLY__

--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora
Forum, a Linux Foundation Collaborative Project.


2016-12-07 20:01:16

by Christopher Covington

[permalink] [raw]
Subject: [PATCH 2/3] arm64: Work around Falkor erratum 1003

From: Shanker Donthineni <[email protected]>

On the Qualcomm Datacenter Technologies Falkor v1 CPU, memory accesses may
allocate TLB entries using an incorrect ASID when TTBRx_EL1 is being
updated. Changing the TTBRx_EL1[ASID] and TTBRx_EL1[BADDR] fields
separately using a reserved ASID will ensure that there are no TLB entries
with incorrect ASID after changing the the ASID.

Pseudo code:
write TTBRx_EL1[ASID] to a reserved value
ISB
write TTBRx_EL1[BADDR] to a desired value
ISB
write TTBRx_EL1[ASID] to a desired value
ISB

Signed-off-by: Shanker Donthineni <[email protected]>
Signed-off-by: Christopher Covington <[email protected]>
---
arch/arm64/Kconfig | 11 +++++++++++
arch/arm64/include/asm/cpucaps.h | 3 ++-
arch/arm64/kernel/cpu_errata.c | 7 +++++++
arch/arm64/mm/context.c | 10 ++++++++++
arch/arm64/mm/proc.S | 21 +++++++++++++++++++++
5 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 969ef88..1004a3d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -474,6 +474,17 @@ config CAVIUM_ERRATUM_27456

If unsure, say Y.

+config QCOM_FALKOR_ERRATUM_E1003
+ bool "Falkor E1003: Incorrect translation due to ASID change"
+ default y
+ help
+ An incorrect translation TLBI entry may be created while
+ changing the ASID & translation table address together for
+ TTBR0_EL1. The workaround for this issue is use a reserved
+ ASID in cpu_do_switch_mm() before switching to target ASID.
+
+ If unsure, say Y.
+
endmenu


diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 87b4465..cb6a8c2 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -34,7 +34,8 @@
#define ARM64_HAS_32BIT_EL0 13
#define ARM64_HYP_OFFSET_LOW 14
#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
+#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 16

-#define ARM64_NCAPS 16
+#define ARM64_NCAPS 17

#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b75e917..3789e2f 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -130,6 +130,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.def_scope = SCOPE_LOCAL_CPU,
.enable = cpu_enable_trap_ctr_access,
},
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1003
+ {
+ .desc = "Qualcomm Falkor erratum E1003",
+ .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003,
+ MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0x00, 0x00),
+ },
+#endif
{
}
};
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index efcf1f7..f8d94ff 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -87,6 +87,11 @@ static void flush_context(unsigned int cpu)
/* Update the list of reserved ASIDs and the ASID bitmap. */
bitmap_clear(asid_map, 0, NUM_USER_ASIDS);

+ /* Reserve ASID '1' for Falkor erratum E1003 */
+ if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_E1003) &&
+ cpus_have_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003))
+ __set_bit(1, asid_map);
+
/*
* Ensure the generation bump is observed before we xchg the
* active_asids.
@@ -239,6 +244,11 @@ static int asids_init(void)
panic("Failed to allocate bitmap for %lu ASIDs\n",
NUM_USER_ASIDS);

+ /* Reserve ASID '1' for Falkor erratum E1003 */
+ if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_E1003) &&
+ cpus_have_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003))
+ __set_bit(1, asid_map);
+
pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
return 0;
}
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 352c73b..b4d6508 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -134,6 +134,27 @@ ENDPROC(cpu_do_resume)
ENTRY(cpu_do_switch_mm)
mmid x1, x1 // get mm->context.id
bfi x0, x1, #48, #16 // set the ASID
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1003
+alternative_if_not ARM64_WORKAROUND_QCOM_FALKOR_E1003
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+alternative_else
+ mrs x2, ttbr0_el1 // get cuurent TTBR0_EL1
+ mov x3, #1 // reserved ASID
+ bfi x2, x3, #48, #16 // set the reserved ASID + old BADDR
+ msr ttbr0_el1, x2 // update TTBR0_EL1
+ isb
+ bfi x2, x0, #0, #48 // set the desired BADDR + reserved ASID
+ msr ttbr0_el1, x2 // update TTBR0_EL1
+ isb
+alternative_endif
+#endif
msr ttbr0_el1, x0 // set TTBR0
isb
alternative_if ARM64_WORKAROUND_CAVIUM_27456
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora
Forum, a Linux Foundation Collaborative Project.

2016-12-07 20:04:53

by Christopher Covington

[permalink] [raw]
Subject: [PATCH] arm64: Work around Falkor erratum 1009

From: Shanker Donthineni <[email protected]>

During a TLB invalidate sequence targeting the inner shareable
domain, Falkor may prematurely complete the DSB before all loads
and stores using the old translation are observed; instruction
fetches are not subject to the conditions of this erratum.

Signed-off-by: Shanker Donthineni <[email protected]>
Signed-off-by: Christopher Covington <[email protected]>
---
arch/arm64/Kconfig | 10 +++++++++
arch/arm64/include/asm/cpucaps.h | 3 ++-
arch/arm64/include/asm/tlbflush.h | 43 +++++++++++++++++++++++++++++++++++++++
arch/arm64/kernel/cpu_errata.c | 7 +++++++
arch/arm64/kvm/hyp/tlb.c | 39 ++++++++++++++++++++++++++++++-----
5 files changed, 96 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1004a3d..125440f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -485,6 +485,16 @@ config QCOM_FALKOR_ERRATUM_E1003

If unsure, say Y.

+config QCOM_FALKOR_ERRATUM_E1009
+ bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
+ default y
+ help
+ Falkor CPU may prematurely complete a DSB following a TLBI xxIS
+ invalidate maintenance operations. Repeat the TLBI operation one
+ more time to fix the issue.
+
+ If unsure, say Y.
+
endmenu


diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index cb6a8c2..5357d7f 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -35,7 +35,8 @@
#define ARM64_HYP_OFFSET_LOW 14
#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 16
+#define ARM64_WORKAROUND_QCOM_FALKOR_E1009 17

-#define ARM64_NCAPS 17
+#define ARM64_NCAPS 18

#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index deab523..03bafc5 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -23,6 +23,7 @@

#include <linux/sched.h>
#include <asm/cputype.h>
+#include <asm/alternative.h>

/*
* Raw TLBI operations.
@@ -94,6 +95,13 @@ static inline void flush_tlb_all(void)
dsb(ishst);
__tlbi(vmalle1is);
dsb(ish);
+ asm volatile(ALTERNATIVE(
+ "nop \n"
+ "nop \n",
+ "tlbi vmalle1is \n"
+ "dsb ish \n",
+ ARM64_WORKAROUND_QCOM_FALKOR_E1009)
+ : :);
isb();
}

@@ -104,6 +112,13 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
dsb(ishst);
__tlbi(aside1is, asid);
dsb(ish);
+ asm volatile(ALTERNATIVE(
+ "nop \n"
+ "nop \n",
+ "tlbi aside1is, %0 \n"
+ "dsb ish \n",
+ ARM64_WORKAROUND_QCOM_FALKOR_E1009)
+ : : "r" (asid));
}

static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -114,6 +129,13 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
dsb(ishst);
__tlbi(vale1is, addr);
dsb(ish);
+ asm volatile(ALTERNATIVE(
+ "nop \n"
+ "nop \n",
+ "tlbi vale1is, %0 \n"
+ "dsb ish \n",
+ ARM64_WORKAROUND_QCOM_FALKOR_E1009)
+ : : "r" (addr));
}

/*
@@ -145,6 +167,13 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
__tlbi(vae1is, addr);
}
dsb(ish);
+ asm volatile(ALTERNATIVE(
+ "nop \n"
+ "nop \n",
+ "tlbi vae1is, %0 \n"
+ "dsb ish \n",
+ ARM64_WORKAROUND_QCOM_FALKOR_E1009)
+ : : "r" (end));
}

static inline void flush_tlb_range(struct vm_area_struct *vma,
@@ -169,6 +198,13 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
__tlbi(vaae1is, addr);
dsb(ish);
+ asm volatile(ALTERNATIVE(
+ "nop \n"
+ "nop \n",
+ "tlbi vaae1is, %0 \n"
+ "dsb ish \n",
+ ARM64_WORKAROUND_QCOM_FALKOR_E1009)
+ : : "r" (end));
isb();
}

@@ -183,6 +219,13 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm,

__tlbi(vae1is, addr);
dsb(ish);
+ asm volatile(ALTERNATIVE(
+ "nop \n"
+ "nop \n",
+ "tlbi vae1is, %0 \n"
+ "dsb ish \n",
+ ARM64_WORKAROUND_QCOM_FALKOR_E1009)
+ : : "r" (addr));
}

#endif
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 3789e2f..8013579 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -137,6 +137,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0x00, 0x00),
},
#endif
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1009
+ {
+ .desc = "Qualcomm Falkor erratum E1009",
+ .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1009,
+ MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0x00, 0x00),
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 88e2f2b..dfd3a77 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -16,6 +16,7 @@
*/

#include <asm/kvm_hyp.h>
+#include <asm/alternative.h>

void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
@@ -32,7 +33,14 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
* whole of Stage-1. Weep...
*/
ipa >>= 12;
- asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa));
+ asm volatile("tlbi ipas2e1is, %0 \n"
+ ALTERNATIVE(
+ "nop \n"
+ "nop \n",
+ "dsb ish \n"
+ "tlbi ipas2e1is, %0 \n",
+ ARM64_WORKAROUND_QCOM_FALKOR_E1009)
+ : : "r" (ipa));

/*
* We have to ensure completion of the invalidation at Stage-2,
@@ -41,7 +49,14 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
* the Stage-1 invalidation happened first.
*/
dsb(ish);
- asm volatile("tlbi vmalle1is" : : );
+ asm volatile("tlbi vmalle1is \n"
+ ALTERNATIVE(
+ "nop \n"
+ "nop \n",
+ "dsb ish \n"
+ "tlbi vmalle1is \n",
+ ARM64_WORKAROUND_QCOM_FALKOR_E1009)
+ : : );
dsb(ish);
isb();

@@ -57,7 +72,14 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
write_sysreg(kvm->arch.vttbr, vttbr_el2);
isb();

- asm volatile("tlbi vmalls12e1is" : : );
+ asm volatile("tlbi vmalls12e1is \n"
+ ALTERNATIVE(
+ "nop \n"
+ "nop \n",
+ "dsb ish \n"
+ "tlbi vmalls12e1is \n",
+ ARM64_WORKAROUND_QCOM_FALKOR_E1009)
+ : : );
dsb(ish);
isb();

@@ -82,7 +104,14 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
void __hyp_text __kvm_flush_vm_context(void)
{
dsb(ishst);
- asm volatile("tlbi alle1is \n"
- "ic ialluis ": : );
+ asm volatile("tlbi alle1is \n"
+ ALTERNATIVE(
+ "nop \n"
+ "nop \n",
+ "dsb ish \n"
+ "tlbi alle1is \n",
+ ARM64_WORKAROUND_QCOM_FALKOR_E1009)
+ "ic ialluis \n"
+ : : );
dsb(ish);
}
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora
Forum, a Linux Foundation Collaborative Project.

2016-12-08 10:31:26

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH 2/3] arm64: Work around Falkor erratum 1003

On Wed, Dec 07, 2016 at 03:00:26PM -0500, Christopher Covington wrote:
> From: Shanker Donthineni <[email protected]>
>
> On the Qualcomm Datacenter Technologies Falkor v1 CPU, memory accesses may
> allocate TLB entries using an incorrect ASID when TTBRx_EL1 is being
> updated. Changing the TTBRx_EL1[ASID] and TTBRx_EL1[BADDR] fields
> separately using a reserved ASID will ensure that there are no TLB entries
> with incorrect ASID after changing the the ASID.
>
> Pseudo code:
> write TTBRx_EL1[ASID] to a reserved value
> ISB
> write TTBRx_EL1[BADDR] to a desired value
> ISB
> write TTBRx_EL1[ASID] to a desired value
> ISB

While the new ASID probably won't have incorrect TLB entries, the
reserved ASID will have random entries from all over the place. That's
because in step 1 you change the ASID to the reserved one while leaving
the old BADDR in place. There is a brief time before changing the ASID
when speculative page table walks will populate the TLB with entries
tagged with the reserved ASID. Such entries are never removed during TLB
shoot-down for the real ASID, so, depending on how this CPU implements
the walk cache, you could end up with intermediate level entries still
active and pointing to freed/reused pages. It will eventually hit an
entry that looks global with weird consequences.

We've been bitten by this in the past on arm32: 52af9c6cd863 ("ARM:
6943/1: mm: use TTBR1 instead of reserved context ID").

--
Catalin

2016-12-08 11:20:43

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH] arm64: Work around Falkor erratum 1009

On Wed, Dec 07, 2016 at 03:04:31PM -0500, Christopher Covington wrote:
> From: Shanker Donthineni <[email protected]>
>
> During a TLB invalidate sequence targeting the inner shareable
> domain, Falkor may prematurely complete the DSB before all loads
> and stores using the old translation are observed; instruction
> fetches are not subject to the conditions of this erratum.
>
> Signed-off-by: Shanker Donthineni <[email protected]>
> Signed-off-by: Christopher Covington <[email protected]>
> ---
> arch/arm64/Kconfig | 10 +++++++++
> arch/arm64/include/asm/cpucaps.h | 3 ++-
> arch/arm64/include/asm/tlbflush.h | 43 +++++++++++++++++++++++++++++++++++++++
> arch/arm64/kernel/cpu_errata.c | 7 +++++++
> arch/arm64/kvm/hyp/tlb.c | 39 ++++++++++++++++++++++++++++++-----
> 5 files changed, 96 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 1004a3d..125440f 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -485,6 +485,16 @@ config QCOM_FALKOR_ERRATUM_E1003
>
> If unsure, say Y.
>
> +config QCOM_FALKOR_ERRATUM_E1009
> + bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
> + default y
> + help
> + Falkor CPU may prematurely complete a DSB following a TLBI xxIS
> + invalidate maintenance operations. Repeat the TLBI operation one
> + more time to fix the issue.
> +
> + If unsure, say Y.

Call me perverse, but I like this workaround. People often tend to screw
up TLBI and DVM sync, but the IPI-based workaround is horribly invasive
and fragile. Simply repeating the operation tends to be enough to make
the chance of failure small enough to be acceptable.

> diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
> index cb6a8c2..5357d7f 100644
> --- a/arch/arm64/include/asm/cpucaps.h
> +++ b/arch/arm64/include/asm/cpucaps.h
> @@ -35,7 +35,8 @@
> #define ARM64_HYP_OFFSET_LOW 14
> #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
> #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 16
> +#define ARM64_WORKAROUND_QCOM_FALKOR_E1009 17

Could you rename this to something like ARM64_WORKAROUND_REPEAT_TLBI, so
that it could potentially be used by others?

>
> -#define ARM64_NCAPS 17
> +#define ARM64_NCAPS 18
>
> #endif /* __ASM_CPUCAPS_H */
> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> index deab523..03bafc5 100644
> --- a/arch/arm64/include/asm/tlbflush.h
> +++ b/arch/arm64/include/asm/tlbflush.h
> @@ -23,6 +23,7 @@
>
> #include <linux/sched.h>
> #include <asm/cputype.h>
> +#include <asm/alternative.h>
>
> /*
> * Raw TLBI operations.
> @@ -94,6 +95,13 @@ static inline void flush_tlb_all(void)
> dsb(ishst);
> __tlbi(vmalle1is);
> dsb(ish);
> + asm volatile(ALTERNATIVE(
> + "nop \n"
> + "nop \n",
> + "tlbi vmalle1is \n"
> + "dsb ish \n",
> + ARM64_WORKAROUND_QCOM_FALKOR_E1009)
> + : :);

I'd much rather this was part of the __tlbi macro, which would hopefully
restrict this to one place in the code.

Will

2016-12-08 11:35:39

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH] arm64: Work around Falkor erratum 1009

On 08/12/16 11:20, Will Deacon wrote:
> On Wed, Dec 07, 2016 at 03:04:31PM -0500, Christopher Covington wrote:
>> From: Shanker Donthineni <[email protected]>
>>
>> During a TLB invalidate sequence targeting the inner shareable
>> domain, Falkor may prematurely complete the DSB before all loads
>> and stores using the old translation are observed; instruction
>> fetches are not subject to the conditions of this erratum.
>>
>> Signed-off-by: Shanker Donthineni <[email protected]>
>> Signed-off-by: Christopher Covington <[email protected]>
>> ---
>> arch/arm64/Kconfig | 10 +++++++++
>> arch/arm64/include/asm/cpucaps.h | 3 ++-
>> arch/arm64/include/asm/tlbflush.h | 43 +++++++++++++++++++++++++++++++++++++++
>> arch/arm64/kernel/cpu_errata.c | 7 +++++++
>> arch/arm64/kvm/hyp/tlb.c | 39 ++++++++++++++++++++++++++++++-----
>> 5 files changed, 96 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 1004a3d..125440f 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -485,6 +485,16 @@ config QCOM_FALKOR_ERRATUM_E1003
>>
>> If unsure, say Y.
>>
>> +config QCOM_FALKOR_ERRATUM_E1009
>> + bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
>> + default y
>> + help
>> + Falkor CPU may prematurely complete a DSB following a TLBI xxIS
>> + invalidate maintenance operations. Repeat the TLBI operation one
>> + more time to fix the issue.
>> +
>> + If unsure, say Y.
>
> Call me perverse, but I like this workaround. People often tend to screw
> up TLBI and DVM sync, but the IPI-based workaround is horribly invasive
> and fragile. Simply repeating the operation tends to be enough to make
> the chance of failure small enough to be acceptable.
>
>> diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
>> index cb6a8c2..5357d7f 100644
>> --- a/arch/arm64/include/asm/cpucaps.h
>> +++ b/arch/arm64/include/asm/cpucaps.h
>> @@ -35,7 +35,8 @@
>> #define ARM64_HYP_OFFSET_LOW 14
>> #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
>> #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 16
>> +#define ARM64_WORKAROUND_QCOM_FALKOR_E1009 17
>
> Could you rename this to something like ARM64_WORKAROUND_REPEAT_TLBI, so
> that it could potentially be used by others?

And add a parameter to it so that we can generate multiple TLBIs,
depending on the level of brokenness? ;-)

M.
--
Jazz is not dead. It just smells funny...

2016-12-08 11:38:50

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH 2/3] arm64: Work around Falkor erratum 1003

On Wed, Dec 07, 2016 at 03:00:26PM -0500, Christopher Covington wrote:
> From: Shanker Donthineni <[email protected]>
>
> On the Qualcomm Datacenter Technologies Falkor v1 CPU, memory accesses may
> allocate TLB entries using an incorrect ASID when TTBRx_EL1 is being
> updated. Changing the TTBRx_EL1[ASID] and TTBRx_EL1[BADDR] fields
> separately using a reserved ASID will ensure that there are no TLB entries
> with incorrect ASID after changing the the ASID.
>
> Pseudo code:
> write TTBRx_EL1[ASID] to a reserved value
> ISB
> write TTBRx_EL1[BADDR] to a desired value
> ISB
> write TTBRx_EL1[ASID] to a desired value
> ISB
>
> Signed-off-by: Shanker Donthineni <[email protected]>
> Signed-off-by: Christopher Covington <[email protected]>
> ---
> arch/arm64/Kconfig | 11 +++++++++++
> arch/arm64/include/asm/cpucaps.h | 3 ++-
> arch/arm64/kernel/cpu_errata.c | 7 +++++++
> arch/arm64/mm/context.c | 10 ++++++++++
> arch/arm64/mm/proc.S | 21 +++++++++++++++++++++
> 5 files changed, 51 insertions(+), 1 deletion(-)

This needs an update to Documentation/arm64/silicon-errata.txt.

> diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
> index efcf1f7..f8d94ff 100644
> --- a/arch/arm64/mm/context.c
> +++ b/arch/arm64/mm/context.c
> @@ -87,6 +87,11 @@ static void flush_context(unsigned int cpu)
> /* Update the list of reserved ASIDs and the ASID bitmap. */
> bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
>
> + /* Reserve ASID '1' for Falkor erratum E1003 */
> + if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_E1003) &&
> + cpus_have_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003))
> + __set_bit(1, asid_map);
> +
> /*
> * Ensure the generation bump is observed before we xchg the
> * active_asids.
> @@ -239,6 +244,11 @@ static int asids_init(void)
> panic("Failed to allocate bitmap for %lu ASIDs\n",
> NUM_USER_ASIDS);
>
> + /* Reserve ASID '1' for Falkor erratum E1003 */
> + if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_E1003) &&
> + cpus_have_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003))
> + __set_bit(1, asid_map);
> +
> pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
> return 0;
> }
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 352c73b..b4d6508 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -134,6 +134,27 @@ ENDPROC(cpu_do_resume)
> ENTRY(cpu_do_switch_mm)
> mmid x1, x1 // get mm->context.id
> bfi x0, x1, #48, #16 // set the ASID
> +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1003
> +alternative_if_not ARM64_WORKAROUND_QCOM_FALKOR_E1003
> + nop
> + nop
> + nop
> + nop
> + nop
> + nop
> + nop
> + nop
> +alternative_else
> + mrs x2, ttbr0_el1 // get cuurent TTBR0_EL1
> + mov x3, #1 // reserved ASID

It might be best to define a FALCOR_E1003_RESERVED_ASID constant
somewhere, rather than using 1 directly here and in the ASID allocator.

> + bfi x2, x3, #48, #16 // set the reserved ASID + old BADDR
> + msr ttbr0_el1, x2 // update TTBR0_EL1
> + isb
> + bfi x2, x0, #0, #48 // set the desired BADDR + reserved ASID
> + msr ttbr0_el1, x2 // update TTBR0_EL1
> + isb
> +alternative_endif

Please use alternative_if and alternative_else_nop_endif.

As Catalin noted, there are issues with stale and/or conflicting TLB
entries allocated with the reserved ASID, so we likely have to
invalidate that after the final switch.

Thanks,
Mark.

2016-12-08 11:46:02

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH] arm64: Work around Falkor erratum 1009

On Wed, Dec 07, 2016 at 03:04:31PM -0500, Christopher Covington wrote:
> From: Shanker Donthineni <[email protected]>
>
> During a TLB invalidate sequence targeting the inner shareable
> domain, Falkor may prematurely complete the DSB before all loads
> and stores using the old translation are observed; instruction
> fetches are not subject to the conditions of this erratum.
>
> Signed-off-by: Shanker Donthineni <[email protected]>
> Signed-off-by: Christopher Covington <[email protected]>
> ---
> arch/arm64/Kconfig | 10 +++++++++
> arch/arm64/include/asm/cpucaps.h | 3 ++-
> arch/arm64/include/asm/tlbflush.h | 43 +++++++++++++++++++++++++++++++++++++++
> arch/arm64/kernel/cpu_errata.c | 7 +++++++
> arch/arm64/kvm/hyp/tlb.c | 39 ++++++++++++++++++++++++++++++-----
> 5 files changed, 96 insertions(+), 6 deletions(-)

Please update Documentation/arm64/silicon-errata.txt respectively.

[...]

> #include <linux/sched.h>
> #include <asm/cputype.h>
> +#include <asm/alternative.h>

Nit: please keep includes (alphabetically) ordered (at least below the
linux/ or asm/ level).

[...]

> + asm volatile(ALTERNATIVE(
> + "nop \n"
> + "nop \n",
> + "tlbi vmalle1is \n"
> + "dsb ish \n",

As a general note, perhaps we want a C compatible NOP_ALTERNATIVE() so
that the nop case can be implicitly generated for sequences like this.

Thanks,
Mark.

2016-12-08 13:27:53

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH] arm64: Work around Falkor erratum 1009

On Thu, Dec 08, 2016 at 11:45:12AM +0000, Mark Rutland wrote:
> On Wed, Dec 07, 2016 at 03:04:31PM -0500, Christopher Covington wrote:
> > + asm volatile(ALTERNATIVE(
> > + "nop \n"
> > + "nop \n",
> > + "tlbi vmalle1is \n"
> > + "dsb ish \n",
>
> As a general note, perhaps we want a C compatible NOP_ALTERNATIVE() so
> that the nop case can be implicitly generated for sequences like this.

It's also worth checking what cpus_have_const_cap() would generate for
the default (no workaround required) case.

--
Catalin

2016-12-19 21:27:34

by Christopher Covington

[permalink] [raw]
Subject: Re: [PATCH 2/3] arm64: Work around Falkor erratum 1003

Hi Catalin,

On 12/08/2016 05:31 AM, Catalin Marinas wrote:
> On Wed, Dec 07, 2016 at 03:00:26PM -0500, Christopher Covington wrote:
>> From: Shanker Donthineni <[email protected]>
>>
>> On the Qualcomm Datacenter Technologies Falkor v1 CPU, memory accesses may
>> allocate TLB entries using an incorrect ASID when TTBRx_EL1 is being
>> updated. Changing the TTBRx_EL1[ASID] and TTBRx_EL1[BADDR] fields
>> separately using a reserved ASID will ensure that there are no TLB entries
>> with incorrect ASID after changing the the ASID.
>>
>> Pseudo code:
>> write TTBRx_EL1[ASID] to a reserved value
>> ISB
>> write TTBRx_EL1[BADDR] to a desired value
>> ISB
>> write TTBRx_EL1[ASID] to a desired value
>> ISB
>
> While the new ASID probably won't have incorrect TLB entries, the
> reserved ASID will have random entries from all over the place. That's
> because in step 1 you change the ASID to the reserved one while leaving
> the old BADDR in place. There is a brief time before changing the ASID
> when speculative page table walks will populate the TLB with entries
> tagged with the reserved ASID. Such entries are never removed during TLB
> shoot-down for the real ASID, so, depending on how this CPU implements
> the walk cache, you could end up with intermediate level entries still
> active and pointing to freed/reused pages. It will eventually hit an
> entry that looks global with weird consequences.
>
> We've been bitten by this in the past on arm32: 52af9c6cd863 ("ARM:
> 6943/1: mm: use TTBR1 instead of reserved context ID").

Thanks for bringing this up, but I'm told the scenario you describe won't
happen on the Falkor 1.0 CPU.

Thanks,
Cov

--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code
Aurora Forum, a Linux Foundation Collaborative Project.