2018-03-02 22:08:23

by Shanker Donthineni

[permalink] [raw]
Subject: [PATCH] arm64: KVM: Use SMCCC_ARCH_WORKAROUND_1 for Falkor BP hardening

The function SMCCC_ARCH_WORKAROUND_1 was introduced as part of SMC
V1.1 Calling Convention to mitigate CVE-2017-5715. This patch uses
the standard call SMCCC_ARCH_WORKAROUND_1 for Falkor chips instead
of Silicon provider service ID 0xC2001700.

Signed-off-by: Shanker Donthineni <[email protected]>
---
arch/arm64/include/asm/cpucaps.h | 2 +-
arch/arm64/include/asm/kvm_asm.h | 2 --
arch/arm64/kernel/bpi.S | 8 ------
arch/arm64/kernel/cpu_errata.c | 55 ++++++++++++++--------------------------
arch/arm64/kvm/hyp/entry.S | 12 ---------
arch/arm64/kvm/hyp/switch.c | 10 --------
6 files changed, 20 insertions(+), 69 deletions(-)

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index bb26382..6ecc249 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -43,7 +43,7 @@
#define ARM64_SVE 22
#define ARM64_UNMAP_KERNEL_AT_EL0 23
#define ARM64_HARDEN_BRANCH_PREDICTOR 24
-#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25
+/* #define ARM64_UNALLOCATED_ENTRY 25 */
#define ARM64_HAS_RAS_EXTN 26

#define ARM64_NCAPS 27
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 24961b7..ab4d0a9 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -68,8 +68,6 @@

extern u32 __init_stage2_translation(void);

-extern void __qcom_hyp_sanitize_btac_predictors(void);
-
#endif

#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
index e5de335..dc4eb15 100644
--- a/arch/arm64/kernel/bpi.S
+++ b/arch/arm64/kernel/bpi.S
@@ -55,14 +55,6 @@ ENTRY(__bp_harden_hyp_vecs_start)
.endr
ENTRY(__bp_harden_hyp_vecs_end)

-ENTRY(__qcom_hyp_sanitize_link_stack_start)
- stp x29, x30, [sp, #-16]!
- .rept 16
- bl . + 4
- .endr
- ldp x29, x30, [sp], #16
-ENTRY(__qcom_hyp_sanitize_link_stack_end)
-
.macro smccc_workaround_1 inst
sub sp, sp, #(8 * 4)
stp x2, x3, [sp, #(8 * 0)]
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 52f15cd..d779ffd4 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -67,8 +67,6 @@ static int cpu_enable_trap_ctr_access(void *__unused)
DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);

#ifdef CONFIG_KVM
-extern char __qcom_hyp_sanitize_link_stack_start[];
-extern char __qcom_hyp_sanitize_link_stack_end[];
extern char __smccc_workaround_1_smc_start[];
extern char __smccc_workaround_1_smc_end[];
extern char __smccc_workaround_1_hvc_start[];
@@ -115,8 +113,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
spin_unlock(&bp_lock);
}
#else
-#define __qcom_hyp_sanitize_link_stack_start NULL
-#define __qcom_hyp_sanitize_link_stack_end NULL
#define __smccc_workaround_1_smc_start NULL
#define __smccc_workaround_1_smc_end NULL
#define __smccc_workaround_1_hvc_start NULL
@@ -161,12 +157,25 @@ static void call_hvc_arch_workaround_1(void)
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
}

+static void qcom_link_stack_sanitization(void)
+{
+ u64 tmp;
+
+ asm volatile("mov %0, x30 \n"
+ ".rept 16 \n"
+ "bl . + 4 \n"
+ ".endr \n"
+ "mov x30, %0 \n"
+ : "=&r" (tmp));
+}
+
static int enable_smccc_arch_workaround_1(void *data)
{
const struct arm64_cpu_capabilities *entry = data;
bp_hardening_cb_t cb;
void *smccc_start, *smccc_end;
struct arm_smccc_res res;
+ u32 midr = read_cpuid_id();

if (!entry->matches(entry, SCOPE_LOCAL_CPU))
return 0;
@@ -199,33 +208,15 @@ static int enable_smccc_arch_workaround_1(void *data)
return 0;
}

+ if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
+ ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
+ cb = qcom_link_stack_sanitization;
+
install_bp_hardening_cb(entry, cb, smccc_start, smccc_end);

return 0;
}

-static void qcom_link_stack_sanitization(void)
-{
- u64 tmp;
-
- asm volatile("mov %0, x30 \n"
- ".rept 16 \n"
- "bl . + 4 \n"
- ".endr \n"
- "mov x30, %0 \n"
- : "=&r" (tmp));
-}
-
-static int qcom_enable_link_stack_sanitization(void *data)
-{
- const struct arm64_cpu_capabilities *entry = data;
-
- install_bp_hardening_cb(entry, qcom_link_stack_sanitization,
- __qcom_hyp_sanitize_link_stack_start,
- __qcom_hyp_sanitize_link_stack_end);
-
- return 0;
-}
#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */

#define MIDR_RANGE(model, min, max) \
@@ -400,20 +391,12 @@ static int qcom_enable_link_stack_sanitization(void *data)
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
- .enable = qcom_enable_link_stack_sanitization,
- },
- {
- .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
- MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
+ .enable = enable_smccc_arch_workaround_1,
},
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
- .enable = qcom_enable_link_stack_sanitization,
- },
- {
- .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
- MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+ .enable = enable_smccc_arch_workaround_1,
},
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index fdd1068..56fc2bb 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -213,15 +213,3 @@ alternative_endif

eret
ENDPROC(__fpsimd_guest_restore)
-
-ENTRY(__qcom_hyp_sanitize_btac_predictors)
- /**
- * Call SMC64 with Silicon provider serviceID 23<<8 (0xc2001700)
- * 0xC2000000-0xC200FFFF: assigned to SiP Service Calls
- * b15-b0: contains SiP functionID
- */
- movz x0, #0x1700
- movk x0, #0xc200, lsl #16
- smc #0
- ret
-ENDPROC(__qcom_hyp_sanitize_btac_predictors)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 870f4b1..d4a336e 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -403,16 +403,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
/* 0 falls through to be handled out of EL2 */
}

- if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) {
- u32 midr = read_cpuid_id();
-
- /* Apply BTAC predictors mitigation to all Falkor chips */
- if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
- ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
- __qcom_hyp_sanitize_btac_predictors();
- }
- }
-
fp_enabled = __fpsimd_enabled();

__sysreg_save_guest_state(guest_ctxt);
--
Qualcomm Datacenter Technologies, Inc. on behalf of the Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.



2018-03-05 15:53:54

by Timur Tabi

[permalink] [raw]
Subject: Re: [PATCH] arm64: KVM: Use SMCCC_ARCH_WORKAROUND_1 for Falkor BP hardening

On Fri, Mar 2, 2018 at 3:50 PM, Shanker Donthineni
<[email protected]> wrote:
> diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
> index bb26382..6ecc249 100644
> --- a/arch/arm64/include/asm/cpucaps.h
> +++ b/arch/arm64/include/asm/cpucaps.h
> @@ -43,7 +43,7 @@
> #define ARM64_SVE 22
> #define ARM64_UNMAP_KERNEL_AT_EL0 23
> #define ARM64_HARDEN_BRANCH_PREDICTOR 24
> -#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25
> +/* #define ARM64_UNALLOCATED_ENTRY 25 */

Why not just delete the entry? Is ARM64_NCAPS never supposed to get smaller?

2018-03-05 15:57:25

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH] arm64: KVM: Use SMCCC_ARCH_WORKAROUND_1 for Falkor BP hardening

On Mon, Mar 05, 2018 at 09:52:19AM -0600, Timur Tabi wrote:
> On Fri, Mar 2, 2018 at 3:50 PM, Shanker Donthineni
> <[email protected]> wrote:
> > diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
> > index bb26382..6ecc249 100644
> > --- a/arch/arm64/include/asm/cpucaps.h
> > +++ b/arch/arm64/include/asm/cpucaps.h
> > @@ -43,7 +43,7 @@
> > #define ARM64_SVE 22
> > #define ARM64_UNMAP_KERNEL_AT_EL0 23
> > #define ARM64_HARDEN_BRANCH_PREDICTOR 24
> > -#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25
> > +/* #define ARM64_UNALLOCATED_ENTRY 25 */
>
> Why not just delete the entry?

Deleting the entry, and renumbering subsequent entries is the right
thing to do.

> Is ARM64_NCAPS never supposed to get smaller?

It's internal to the kernel, and it makes sense to keep it as small as
possible. There are datastructures allocated for each entry, like the
static key array.

Thanks,
Mark.

2018-03-05 15:57:33

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH] arm64: KVM: Use SMCCC_ARCH_WORKAROUND_1 for Falkor BP hardening

Hi Shanker,

On Fri, Mar 02, 2018 at 03:50:18PM -0600, Shanker Donthineni wrote:
> The function SMCCC_ARCH_WORKAROUND_1 was introduced as part of SMC
> V1.1 Calling Convention to mitigate CVE-2017-5715. This patch uses
> the standard call SMCCC_ARCH_WORKAROUND_1 for Falkor chips instead
> of Silicon provider service ID 0xC2001700.
>
> Signed-off-by: Shanker Donthineni <[email protected]>
> ---
> arch/arm64/include/asm/cpucaps.h | 2 +-
> arch/arm64/include/asm/kvm_asm.h | 2 --
> arch/arm64/kernel/bpi.S | 8 ------
> arch/arm64/kernel/cpu_errata.c | 55 ++++++++++++++--------------------------
> arch/arm64/kvm/hyp/entry.S | 12 ---------
> arch/arm64/kvm/hyp/switch.c | 10 --------
> 6 files changed, 20 insertions(+), 69 deletions(-)

I'm happy to take this via arm64 if I get an ack from Marc/Christoffer.

> diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
> index bb26382..6ecc249 100644
> --- a/arch/arm64/include/asm/cpucaps.h
> +++ b/arch/arm64/include/asm/cpucaps.h
> @@ -43,7 +43,7 @@
> #define ARM64_SVE 22
> #define ARM64_UNMAP_KERNEL_AT_EL0 23
> #define ARM64_HARDEN_BRANCH_PREDICTOR 24
> -#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25
> +/* #define ARM64_UNALLOCATED_ENTRY 25 */
> #define ARM64_HAS_RAS_EXTN 26
>
> #define ARM64_NCAPS 27

These aren't ABI, so I think you can just drop
ARM64_HARDEN_BP_POST_GUEST_EXIT and repack the others accordingly.

> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index 24961b7..ab4d0a9 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -68,8 +68,6 @@
>
> extern u32 __init_stage2_translation(void);
>
> -extern void __qcom_hyp_sanitize_btac_predictors(void);
> -
> #endif
>
> #endif /* __ARM_KVM_ASM_H__ */
> diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
> index e5de335..dc4eb15 100644
> --- a/arch/arm64/kernel/bpi.S
> +++ b/arch/arm64/kernel/bpi.S
> @@ -55,14 +55,6 @@ ENTRY(__bp_harden_hyp_vecs_start)
> .endr
> ENTRY(__bp_harden_hyp_vecs_end)
>
> -ENTRY(__qcom_hyp_sanitize_link_stack_start)
> - stp x29, x30, [sp, #-16]!
> - .rept 16
> - bl . + 4
> - .endr
> - ldp x29, x30, [sp], #16
> -ENTRY(__qcom_hyp_sanitize_link_stack_end)
> -
> .macro smccc_workaround_1 inst
> sub sp, sp, #(8 * 4)
> stp x2, x3, [sp, #(8 * 0)]
> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> index 52f15cd..d779ffd4 100644
> --- a/arch/arm64/kernel/cpu_errata.c
> +++ b/arch/arm64/kernel/cpu_errata.c
> @@ -67,8 +67,6 @@ static int cpu_enable_trap_ctr_access(void *__unused)
> DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
>
> #ifdef CONFIG_KVM
> -extern char __qcom_hyp_sanitize_link_stack_start[];
> -extern char __qcom_hyp_sanitize_link_stack_end[];
> extern char __smccc_workaround_1_smc_start[];
> extern char __smccc_workaround_1_smc_end[];
> extern char __smccc_workaround_1_hvc_start[];
> @@ -115,8 +113,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
> spin_unlock(&bp_lock);
> }
> #else
> -#define __qcom_hyp_sanitize_link_stack_start NULL
> -#define __qcom_hyp_sanitize_link_stack_end NULL
> #define __smccc_workaround_1_smc_start NULL
> #define __smccc_workaround_1_smc_end NULL
> #define __smccc_workaround_1_hvc_start NULL
> @@ -161,12 +157,25 @@ static void call_hvc_arch_workaround_1(void)
> arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
> }
>
> +static void qcom_link_stack_sanitization(void)
> +{
> + u64 tmp;
> +
> + asm volatile("mov %0, x30 \n"
> + ".rept 16 \n"
> + "bl . + 4 \n"
> + ".endr \n"
> + "mov x30, %0 \n"
> + : "=&r" (tmp));
> +}
> +
> static int enable_smccc_arch_workaround_1(void *data)
> {
> const struct arm64_cpu_capabilities *entry = data;
> bp_hardening_cb_t cb;
> void *smccc_start, *smccc_end;
> struct arm_smccc_res res;
> + u32 midr = read_cpuid_id();
>
> if (!entry->matches(entry, SCOPE_LOCAL_CPU))
> return 0;
> @@ -199,33 +208,15 @@ static int enable_smccc_arch_workaround_1(void *data)
> return 0;
> }
>
> + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
> + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
> + cb = qcom_link_stack_sanitization;

Is this just a performance thing? Do you actually see an advantage over
always making the firmware call? We've seen minimal impact in our testing.

Will

2018-03-05 16:59:55

by Shanker Donthineni

[permalink] [raw]
Subject: Re: [PATCH] arm64: KVM: Use SMCCC_ARCH_WORKAROUND_1 for Falkor BP hardening

Hi Will,

On 03/05/2018 09:56 AM, Will Deacon wrote:
> Hi Shanker,
>
> On Fri, Mar 02, 2018 at 03:50:18PM -0600, Shanker Donthineni wrote:
>> The function SMCCC_ARCH_WORKAROUND_1 was introduced as part of SMC
>> V1.1 Calling Convention to mitigate CVE-2017-5715. This patch uses
>> the standard call SMCCC_ARCH_WORKAROUND_1 for Falkor chips instead
>> of Silicon provider service ID 0xC2001700.
>>
>> Signed-off-by: Shanker Donthineni <[email protected]>
>> ---
>> arch/arm64/include/asm/cpucaps.h | 2 +-
>> arch/arm64/include/asm/kvm_asm.h | 2 --
>> arch/arm64/kernel/bpi.S | 8 ------
>> arch/arm64/kernel/cpu_errata.c | 55 ++++++++++++++--------------------------
>> arch/arm64/kvm/hyp/entry.S | 12 ---------
>> arch/arm64/kvm/hyp/switch.c | 10 --------
>> 6 files changed, 20 insertions(+), 69 deletions(-)
>
> I'm happy to take this via arm64 if I get an ack from Marc/Christoffer.
>
>> diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
>> index bb26382..6ecc249 100644
>> --- a/arch/arm64/include/asm/cpucaps.h
>> +++ b/arch/arm64/include/asm/cpucaps.h
>> @@ -43,7 +43,7 @@
>> #define ARM64_SVE 22
>> #define ARM64_UNMAP_KERNEL_AT_EL0 23
>> #define ARM64_HARDEN_BRANCH_PREDICTOR 24
>> -#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25
>> +/* #define ARM64_UNALLOCATED_ENTRY 25 */
>> #define ARM64_HAS_RAS_EXTN 26
>>
>> #define ARM64_NCAPS 27
>
> These aren't ABI, so I think you can just drop
> ARM64_HARDEN_BP_POST_GUEST_EXIT and repack the others accordingly.
>
Sure, I'll remove it completely in v2 patch.

>> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
>> index 24961b7..ab4d0a9 100644
>> --- a/arch/arm64/include/asm/kvm_asm.h
>> +++ b/arch/arm64/include/asm/kvm_asm.h
>> @@ -68,8 +68,6 @@
>>
>> extern u32 __init_stage2_translation(void);
>>
>> -extern void __qcom_hyp_sanitize_btac_predictors(void);
>> -
>> #endif
>>
>> #endif /* __ARM_KVM_ASM_H__ */
>> diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
>> index e5de335..dc4eb15 100644
>> --- a/arch/arm64/kernel/bpi.S
>> +++ b/arch/arm64/kernel/bpi.S
>> @@ -55,14 +55,6 @@ ENTRY(__bp_harden_hyp_vecs_start)
>> .endr
>> ENTRY(__bp_harden_hyp_vecs_end)
>>
>> -ENTRY(__qcom_hyp_sanitize_link_stack_start)
>> - stp x29, x30, [sp, #-16]!
>> - .rept 16
>> - bl . + 4
>> - .endr
>> - ldp x29, x30, [sp], #16
>> -ENTRY(__qcom_hyp_sanitize_link_stack_end)
>> -
>> .macro smccc_workaround_1 inst
>> sub sp, sp, #(8 * 4)
>> stp x2, x3, [sp, #(8 * 0)]
>> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
>> index 52f15cd..d779ffd4 100644
>> --- a/arch/arm64/kernel/cpu_errata.c
>> +++ b/arch/arm64/kernel/cpu_errata.c
>> @@ -67,8 +67,6 @@ static int cpu_enable_trap_ctr_access(void *__unused)
>> DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
>>
>> #ifdef CONFIG_KVM
>> -extern char __qcom_hyp_sanitize_link_stack_start[];
>> -extern char __qcom_hyp_sanitize_link_stack_end[];
>> extern char __smccc_workaround_1_smc_start[];
>> extern char __smccc_workaround_1_smc_end[];
>> extern char __smccc_workaround_1_hvc_start[];
>> @@ -115,8 +113,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
>> spin_unlock(&bp_lock);
>> }
>> #else
>> -#define __qcom_hyp_sanitize_link_stack_start NULL
>> -#define __qcom_hyp_sanitize_link_stack_end NULL
>> #define __smccc_workaround_1_smc_start NULL
>> #define __smccc_workaround_1_smc_end NULL
>> #define __smccc_workaround_1_hvc_start NULL
>> @@ -161,12 +157,25 @@ static void call_hvc_arch_workaround_1(void)
>> arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
>> }
>>
>> +static void qcom_link_stack_sanitization(void)
>> +{
>> + u64 tmp;
>> +
>> + asm volatile("mov %0, x30 \n"
>> + ".rept 16 \n"
>> + "bl . + 4 \n"
>> + ".endr \n"
>> + "mov x30, %0 \n"
>> + : "=&r" (tmp));
>> +}
>> +
>> static int enable_smccc_arch_workaround_1(void *data)
>> {
>> const struct arm64_cpu_capabilities *entry = data;
>> bp_hardening_cb_t cb;
>> void *smccc_start, *smccc_end;
>> struct arm_smccc_res res;
>> + u32 midr = read_cpuid_id();
>>
>> if (!entry->matches(entry, SCOPE_LOCAL_CPU))
>> return 0;
>> @@ -199,33 +208,15 @@ static int enable_smccc_arch_workaround_1(void *data)
>> return 0;
>> }
>>
>> + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
>> + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
>> + cb = qcom_link_stack_sanitization;
>
> Is this just a performance thing? Do you actually see an advantage over
> always making the firmware call? We've seen minimal impact in our testing.
>

Yes, we've couple of advantages using the standard SMCCC_ARCH_WOKAROUND_1 framework.
- Improves the code readability.
- Avoid the unnecessary MIDR checks on each vCPU exit.
- Validates ID_AA64PFR0_CVS2 feature for Falkor chips.
- Avoids the 2nd link stack sanitization workaround in firmware.


> Will
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>

--
Shanker Donthineni
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.

2018-03-05 17:16:44

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH] arm64: KVM: Use SMCCC_ARCH_WORKAROUND_1 for Falkor BP hardening

On Mon, Mar 05, 2018 at 10:57:58AM -0600, Shanker Donthineni wrote:
> Hi Will,
>
> On 03/05/2018 09:56 AM, Will Deacon wrote:
> > Hi Shanker,
> >
> > On Fri, Mar 02, 2018 at 03:50:18PM -0600, Shanker Donthineni wrote:
> >> The function SMCCC_ARCH_WORKAROUND_1 was introduced as part of SMC
> >> V1.1 Calling Convention to mitigate CVE-2017-5715. This patch uses
> >> the standard call SMCCC_ARCH_WORKAROUND_1 for Falkor chips instead
> >> of Silicon provider service ID 0xC2001700.
> >>
> >> Signed-off-by: Shanker Donthineni <[email protected]>
> >> ---
> >> arch/arm64/include/asm/cpucaps.h | 2 +-
> >> arch/arm64/include/asm/kvm_asm.h | 2 --
> >> arch/arm64/kernel/bpi.S | 8 ------
> >> arch/arm64/kernel/cpu_errata.c | 55 ++++++++++++++--------------------------
> >> arch/arm64/kvm/hyp/entry.S | 12 ---------
> >> arch/arm64/kvm/hyp/switch.c | 10 --------
> >> 6 files changed, 20 insertions(+), 69 deletions(-)
> >
> > I'm happy to take this via arm64 if I get an ack from Marc/Christoffer.
> >
> >> diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
> >> index bb26382..6ecc249 100644
> >> --- a/arch/arm64/include/asm/cpucaps.h
> >> +++ b/arch/arm64/include/asm/cpucaps.h
> >> @@ -43,7 +43,7 @@
> >> #define ARM64_SVE 22
> >> #define ARM64_UNMAP_KERNEL_AT_EL0 23
> >> #define ARM64_HARDEN_BRANCH_PREDICTOR 24
> >> -#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25
> >> +/* #define ARM64_UNALLOCATED_ENTRY 25 */
> >> #define ARM64_HAS_RAS_EXTN 26
> >>
> >> #define ARM64_NCAPS 27
> >
> > These aren't ABI, so I think you can just drop
> > ARM64_HARDEN_BP_POST_GUEST_EXIT and repack the others accordingly.
> >
> Sure, I'll remove it completely in v2 patch.
>
> >> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> >> index 24961b7..ab4d0a9 100644
> >> --- a/arch/arm64/include/asm/kvm_asm.h
> >> +++ b/arch/arm64/include/asm/kvm_asm.h
> >> @@ -68,8 +68,6 @@
> >>
> >> extern u32 __init_stage2_translation(void);
> >>
> >> -extern void __qcom_hyp_sanitize_btac_predictors(void);
> >> -
> >> #endif
> >>
> >> #endif /* __ARM_KVM_ASM_H__ */
> >> diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
> >> index e5de335..dc4eb15 100644
> >> --- a/arch/arm64/kernel/bpi.S
> >> +++ b/arch/arm64/kernel/bpi.S
> >> @@ -55,14 +55,6 @@ ENTRY(__bp_harden_hyp_vecs_start)
> >> .endr
> >> ENTRY(__bp_harden_hyp_vecs_end)
> >>
> >> -ENTRY(__qcom_hyp_sanitize_link_stack_start)
> >> - stp x29, x30, [sp, #-16]!
> >> - .rept 16
> >> - bl . + 4
> >> - .endr
> >> - ldp x29, x30, [sp], #16
> >> -ENTRY(__qcom_hyp_sanitize_link_stack_end)
> >> -
> >> .macro smccc_workaround_1 inst
> >> sub sp, sp, #(8 * 4)
> >> stp x2, x3, [sp, #(8 * 0)]
> >> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> >> index 52f15cd..d779ffd4 100644
> >> --- a/arch/arm64/kernel/cpu_errata.c
> >> +++ b/arch/arm64/kernel/cpu_errata.c
> >> @@ -67,8 +67,6 @@ static int cpu_enable_trap_ctr_access(void *__unused)
> >> DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
> >>
> >> #ifdef CONFIG_KVM
> >> -extern char __qcom_hyp_sanitize_link_stack_start[];
> >> -extern char __qcom_hyp_sanitize_link_stack_end[];
> >> extern char __smccc_workaround_1_smc_start[];
> >> extern char __smccc_workaround_1_smc_end[];
> >> extern char __smccc_workaround_1_hvc_start[];
> >> @@ -115,8 +113,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
> >> spin_unlock(&bp_lock);
> >> }
> >> #else
> >> -#define __qcom_hyp_sanitize_link_stack_start NULL
> >> -#define __qcom_hyp_sanitize_link_stack_end NULL
> >> #define __smccc_workaround_1_smc_start NULL
> >> #define __smccc_workaround_1_smc_end NULL
> >> #define __smccc_workaround_1_hvc_start NULL
> >> @@ -161,12 +157,25 @@ static void call_hvc_arch_workaround_1(void)
> >> arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
> >> }
> >>
> >> +static void qcom_link_stack_sanitization(void)
> >> +{
> >> + u64 tmp;
> >> +
> >> + asm volatile("mov %0, x30 \n"
> >> + ".rept 16 \n"
> >> + "bl . + 4 \n"
> >> + ".endr \n"
> >> + "mov x30, %0 \n"
> >> + : "=&r" (tmp));
> >> +}
> >> +
> >> static int enable_smccc_arch_workaround_1(void *data)
> >> {
> >> const struct arm64_cpu_capabilities *entry = data;
> >> bp_hardening_cb_t cb;
> >> void *smccc_start, *smccc_end;
> >> struct arm_smccc_res res;
> >> + u32 midr = read_cpuid_id();
> >>
> >> if (!entry->matches(entry, SCOPE_LOCAL_CPU))
> >> return 0;
> >> @@ -199,33 +208,15 @@ static int enable_smccc_arch_workaround_1(void *data)
> >> return 0;
> >> }
> >>
> >> + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
> >> + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
> >> + cb = qcom_link_stack_sanitization;
> >
> > Is this just a performance thing? Do you actually see an advantage over
> > always making the firmware call? We've seen minimal impact in our testing.
> >
>
> Yes, we've couple of advantages using the standard SMCCC_ARCH_WOKAROUND_1 framework.
> - Improves the code readability.
> - Avoid the unnecessary MIDR checks on each vCPU exit.
> - Validates ID_AA64PFR0_CVS2 feature for Falkor chips.
> - Avoids the 2nd link stack sanitization workaround in firmware.

What I mean is, can we drop qcom_link_stack_sanitization altogether and
use the SMCCC interface for everything?

Will

2018-03-05 18:05:44

by Shanker Donthineni

[permalink] [raw]
Subject: Re: [PATCH] arm64: KVM: Use SMCCC_ARCH_WORKAROUND_1 for Falkor BP hardening

Hi Will,

On 03/05/2018 11:15 AM, Will Deacon wrote:
> On Mon, Mar 05, 2018 at 10:57:58AM -0600, Shanker Donthineni wrote:
>> Hi Will,
>>
>> On 03/05/2018 09:56 AM, Will Deacon wrote:
>>> Hi Shanker,
>>>
>>> On Fri, Mar 02, 2018 at 03:50:18PM -0600, Shanker Donthineni wrote:
>>>> The function SMCCC_ARCH_WORKAROUND_1 was introduced as part of SMC
>>>> V1.1 Calling Convention to mitigate CVE-2017-5715. This patch uses
>>>> the standard call SMCCC_ARCH_WORKAROUND_1 for Falkor chips instead
>>>> of Silicon provider service ID 0xC2001700.
>>>>
>>>> Signed-off-by: Shanker Donthineni <[email protected]>
>>>> ---
>>>> arch/arm64/include/asm/cpucaps.h | 2 +-
>>>> arch/arm64/include/asm/kvm_asm.h | 2 --
>>>> arch/arm64/kernel/bpi.S | 8 ------
>>>> arch/arm64/kernel/cpu_errata.c | 55 ++++++++++++++--------------------------
>>>> arch/arm64/kvm/hyp/entry.S | 12 ---------
>>>> arch/arm64/kvm/hyp/switch.c | 10 --------
>>>> 6 files changed, 20 insertions(+), 69 deletions(-)
>>>
>>> I'm happy to take this via arm64 if I get an ack from Marc/Christoffer.
>>>
>>>> diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
>>>> index bb26382..6ecc249 100644
>>>> --- a/arch/arm64/include/asm/cpucaps.h
>>>> +++ b/arch/arm64/include/asm/cpucaps.h
>>>> @@ -43,7 +43,7 @@
>>>> #define ARM64_SVE 22
>>>> #define ARM64_UNMAP_KERNEL_AT_EL0 23
>>>> #define ARM64_HARDEN_BRANCH_PREDICTOR 24
>>>> -#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25
>>>> +/* #define ARM64_UNALLOCATED_ENTRY 25 */
>>>> #define ARM64_HAS_RAS_EXTN 26
>>>>
>>>> #define ARM64_NCAPS 27
>>>
>>> These aren't ABI, so I think you can just drop
>>> ARM64_HARDEN_BP_POST_GUEST_EXIT and repack the others accordingly.
>>>
>> Sure, I'll remove it completely in v2 patch.
>>
>>>> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
>>>> index 24961b7..ab4d0a9 100644
>>>> --- a/arch/arm64/include/asm/kvm_asm.h
>>>> +++ b/arch/arm64/include/asm/kvm_asm.h
>>>> @@ -68,8 +68,6 @@
>>>>
>>>> extern u32 __init_stage2_translation(void);
>>>>
>>>> -extern void __qcom_hyp_sanitize_btac_predictors(void);
>>>> -
>>>> #endif
>>>>
>>>> #endif /* __ARM_KVM_ASM_H__ */
>>>> diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
>>>> index e5de335..dc4eb15 100644
>>>> --- a/arch/arm64/kernel/bpi.S
>>>> +++ b/arch/arm64/kernel/bpi.S
>>>> @@ -55,14 +55,6 @@ ENTRY(__bp_harden_hyp_vecs_start)
>>>> .endr
>>>> ENTRY(__bp_harden_hyp_vecs_end)
>>>>
>>>> -ENTRY(__qcom_hyp_sanitize_link_stack_start)
>>>> - stp x29, x30, [sp, #-16]!
>>>> - .rept 16
>>>> - bl . + 4
>>>> - .endr
>>>> - ldp x29, x30, [sp], #16
>>>> -ENTRY(__qcom_hyp_sanitize_link_stack_end)
>>>> -
>>>> .macro smccc_workaround_1 inst
>>>> sub sp, sp, #(8 * 4)
>>>> stp x2, x3, [sp, #(8 * 0)]
>>>> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
>>>> index 52f15cd..d779ffd4 100644
>>>> --- a/arch/arm64/kernel/cpu_errata.c
>>>> +++ b/arch/arm64/kernel/cpu_errata.c
>>>> @@ -67,8 +67,6 @@ static int cpu_enable_trap_ctr_access(void *__unused)
>>>> DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
>>>>
>>>> #ifdef CONFIG_KVM
>>>> -extern char __qcom_hyp_sanitize_link_stack_start[];
>>>> -extern char __qcom_hyp_sanitize_link_stack_end[];
>>>> extern char __smccc_workaround_1_smc_start[];
>>>> extern char __smccc_workaround_1_smc_end[];
>>>> extern char __smccc_workaround_1_hvc_start[];
>>>> @@ -115,8 +113,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
>>>> spin_unlock(&bp_lock);
>>>> }
>>>> #else
>>>> -#define __qcom_hyp_sanitize_link_stack_start NULL
>>>> -#define __qcom_hyp_sanitize_link_stack_end NULL
>>>> #define __smccc_workaround_1_smc_start NULL
>>>> #define __smccc_workaround_1_smc_end NULL
>>>> #define __smccc_workaround_1_hvc_start NULL
>>>> @@ -161,12 +157,25 @@ static void call_hvc_arch_workaround_1(void)
>>>> arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
>>>> }
>>>>
>>>> +static void qcom_link_stack_sanitization(void)
>>>> +{
>>>> + u64 tmp;
>>>> +
>>>> + asm volatile("mov %0, x30 \n"
>>>> + ".rept 16 \n"
>>>> + "bl . + 4 \n"
>>>> + ".endr \n"
>>>> + "mov x30, %0 \n"
>>>> + : "=&r" (tmp));
>>>> +}
>>>> +
>>>> static int enable_smccc_arch_workaround_1(void *data)
>>>> {
>>>> const struct arm64_cpu_capabilities *entry = data;
>>>> bp_hardening_cb_t cb;
>>>> void *smccc_start, *smccc_end;
>>>> struct arm_smccc_res res;
>>>> + u32 midr = read_cpuid_id();
>>>>
>>>> if (!entry->matches(entry, SCOPE_LOCAL_CPU))
>>>> return 0;
>>>> @@ -199,33 +208,15 @@ static int enable_smccc_arch_workaround_1(void *data)
>>>> return 0;
>>>> }
>>>>
>>>> + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
>>>> + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
>>>> + cb = qcom_link_stack_sanitization;
>>>
>>> Is this just a performance thing? Do you actually see an advantage over
>>> always making the firmware call? We've seen minimal impact in our testing.
>>>
>>
>> Yes, we've couple of advantages using the standard SMCCC_ARCH_WOKAROUND_1 framework.
>> - Improves the code readability.
>> - Avoid the unnecessary MIDR checks on each vCPU exit.
>> - Validates ID_AA64PFR0_CVS2 feature for Falkor chips.
>> - Avoids the 2nd link stack sanitization workaround in firmware.
>
> What I mean is, can we drop qcom_link_stack_sanitization altogether and
> use the SMCCC interface for everything?
>

No, We would like to keep it qcom_link_stack_sanitization for host kernel
since it takes a few CPU cycles instead of heavyweight SMCCC call.

I posted v2 patch https://patchwork.kernel.org/patch/10259357/ to address
cpucaps.h cleanup review comments.

> Will
>

--
Shanker Donthineni
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.

2018-03-06 15:27:08

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH] arm64: KVM: Use SMCCC_ARCH_WORKAROUND_1 for Falkor BP hardening

On Mon, Mar 05, 2018 at 12:03:33PM -0600, Shanker Donthineni wrote:
> On 03/05/2018 11:15 AM, Will Deacon wrote:
> > On Mon, Mar 05, 2018 at 10:57:58AM -0600, Shanker Donthineni wrote:
> >> On 03/05/2018 09:56 AM, Will Deacon wrote:
> >>> On Fri, Mar 02, 2018 at 03:50:18PM -0600, Shanker Donthineni wrote:
> >>>> @@ -199,33 +208,15 @@ static int enable_smccc_arch_workaround_1(void *data)
> >>>> return 0;
> >>>> }
> >>>>
> >>>> + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
> >>>> + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
> >>>> + cb = qcom_link_stack_sanitization;
> >>>
> >>> Is this just a performance thing? Do you actually see an advantage over
> >>> always making the firmware call? We've seen minimal impact in our testing.
> >>>
> >>
> >> Yes, we've couple of advantages using the standard SMCCC_ARCH_WOKAROUND_1 framework.
> >> - Improves the code readability.
> >> - Avoid the unnecessary MIDR checks on each vCPU exit.
> >> - Validates ID_AA64PFR0_CVS2 feature for Falkor chips.
> >> - Avoids the 2nd link stack sanitization workaround in firmware.
> >
> > What I mean is, can we drop qcom_link_stack_sanitization altogether and
> > use the SMCCC interface for everything?
> >
>
> No, We would like to keep it qcom_link_stack_sanitization for host kernel
> since it takes a few CPU cycles instead of heavyweight SMCCC call.

Is that something that you can actually measure in the workloads and
benchmarks that you care about? If so, fine, but that doesn't seem to be the
case for the Cortex cores we've looked at internally and it would be nice to
avoid having different workarounds in the kernel just because the SMCCC
interface wasn't baked in time, rather than because there's a meaningful
performance difference.

Will

2018-03-10 18:28:22

by Shanker Donthineni

[permalink] [raw]
Subject: Re: [PATCH] arm64: KVM: Use SMCCC_ARCH_WORKAROUND_1 for Falkor BP hardening

Hi Will,

On 03/06/2018 09:25 AM, Will Deacon wrote:
> On Mon, Mar 05, 2018 at 12:03:33PM -0600, Shanker Donthineni wrote:
>> On 03/05/2018 11:15 AM, Will Deacon wrote:
>>> On Mon, Mar 05, 2018 at 10:57:58AM -0600, Shanker Donthineni wrote:
>>>> On 03/05/2018 09:56 AM, Will Deacon wrote:
>>>>> On Fri, Mar 02, 2018 at 03:50:18PM -0600, Shanker Donthineni wrote:
>>>>>> @@ -199,33 +208,15 @@ static int enable_smccc_arch_workaround_1(void *data)
>>>>>> return 0;
>>>>>> }
>>>>>>
>>>>>> + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
>>>>>> + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
>>>>>> + cb = qcom_link_stack_sanitization;
>>>>>
>>>>> Is this just a performance thing? Do you actually see an advantage over
>>>>> always making the firmware call? We've seen minimal impact in our testing.
>>>>>
>>>>
>>>> Yes, we've couple of advantages using the standard SMCCC_ARCH_WOKAROUND_1 framework.
>>>> - Improves the code readability.
>>>> - Avoid the unnecessary MIDR checks on each vCPU exit.
>>>> - Validates ID_AA64PFR0_CVS2 feature for Falkor chips.
>>>> - Avoids the 2nd link stack sanitization workaround in firmware.
>>>
>>> What I mean is, can we drop qcom_link_stack_sanitization altogether and
>>> use the SMCCC interface for everything?
>>>
>>
>> No, We would like to keep it qcom_link_stack_sanitization for host kernel
>> since it takes a few CPU cycles instead of heavyweight SMCCC call.
>
> Is that something that you can actually measure in the workloads and
> benchmarks that you care about? If so, fine, but that doesn't seem to be the
> case for the Cortex cores we've looked at internally and it would be nice to
> avoid having different workarounds in the kernel just because the SMCCC
> interface wasn't baked in time, rather than because there's a meaningful
> performance difference.
>

We've seen noticeable performance improvement with the microbench workloads,
ans also some of our customers have observed improvements on heavy workloads.
Unfortunately I can't share those specific results here. SMCCC call overhead
is much higher as compared to link stack workaround on Falkor, ~99X.

Host kernel workaround takes less than ~20 CPU cycles, whereas SMCCC_ARCH_WOKAROUND_1
consumes thousands of CPU cycles to sanitize the branch prediction on Falkor.

Especially workloads inside virtual machines provides much better results because
no KVM involvement is required whenever guest calls qcom_link_stack_sanitization().

> Will
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>

--
Shanker Donthineni
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.