2020-06-02 14:01:21

by Zhenyu Ye

[permalink] [raw]
Subject: [PATCH v4 0/6] arm64: tlb: add support for TTL feature

In order to reduce the cost of TLB invalidation, ARMv8.4 provides
the TTL field in TLBI instruction. The TTL field indicates the
level of page table walk holding the leaf entry for the address
being invalidated. This series provide support for this feature.

When ARMv8.4-TTL is implemented, the operand for TLBIs looks like
below:

* +----------+-------+----------------------+
* | ASID | TTL | BADDR |
* +----------+-------+----------------------+
* |63 48|47 44|43 0|

See patches for details, Thanks.

--
ChangeList:
v4:
implement flush_*_tlb_range only on arm64.

v3:
minor changes: reduce the indentation levels of __tlbi_level().

v2:
rebase series on Linux 5.7-rc1 and simplify the code implementation.

v1:
add support for TTL feature in arm64.


Marc Zyngier (2):
arm64: Detect the ARMv8.4 TTL feature
arm64: Add level-hinted TLB invalidation helper

Peter Zijlstra (Intel) (1):
tlb: mmu_gather: add tlb_flush_*_range APIs

Zhenyu Ye (3):
arm64: Add tlbi_user_level TLB invalidation helper
arm64: tlb: Set the TTL field in flush_tlb_range
arm64: tlb: Set the TTL field in flush_*_tlb_range

arch/arm64/include/asm/cpucaps.h | 3 +-
arch/arm64/include/asm/pgtable.h | 10 ++++++
arch/arm64/include/asm/sysreg.h | 1 +
arch/arm64/include/asm/tlb.h | 29 +++++++++++++++-
arch/arm64/include/asm/tlbflush.h | 54 +++++++++++++++++++++++++-----
arch/arm64/kernel/cpufeature.c | 11 +++++++
include/asm-generic/tlb.h | 55 ++++++++++++++++++++++---------
7 files changed, 138 insertions(+), 25 deletions(-)

--
2.19.1



2020-06-02 14:01:33

by Zhenyu Ye

[permalink] [raw]
Subject: [PATCH v4 1/6] arm64: Detect the ARMv8.4 TTL feature

From: Marc Zyngier <[email protected]>

In order to reduce the cost of TLB invalidation, the ARMv8.4 TTL
feature allows TLBs to be issued with a level allowing for quicker
invalidation.

Let's detect the feature for now. Further patches will implement
its actual usage.

Signed-off-by: Marc Zyngier <[email protected]>
Signed-off-by: Zhenyu Ye <[email protected]>
Reviewed-by: Catalin Marinas <[email protected]>
---
arch/arm64/include/asm/cpucaps.h | 3 ++-
arch/arm64/include/asm/sysreg.h | 1 +
arch/arm64/kernel/cpufeature.c | 11 +++++++++++
3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 8eb5a088ae65..cabb0c49a1d1 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -61,7 +61,8 @@
#define ARM64_HAS_AMU_EXTN 51
#define ARM64_HAS_ADDRESS_AUTH 52
#define ARM64_HAS_GENERIC_AUTH 53
+#define ARM64_HAS_ARMv8_4_TTL 54

-#define ARM64_NCAPS 54
+#define ARM64_NCAPS 55

#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index c4ac0ac25a00..477d84ba1056 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -725,6 +725,7 @@

/* id_aa64mmfr2 */
#define ID_AA64MMFR2_E0PD_SHIFT 60
+#define ID_AA64MMFR2_TTL_SHIFT 48
#define ID_AA64MMFR2_FWB_SHIFT 40
#define ID_AA64MMFR2_AT_SHIFT 32
#define ID_AA64MMFR2_LVA_SHIFT 16
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9fac745aa7bb..d993dc6dc7d5 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -244,6 +244,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {

static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_TTL_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
@@ -1622,6 +1623,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.cpu_enable = cpu_has_fwb,
},
+ {
+ .desc = "ARMv8.4 Translation Table Level",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_ARMv8_4_TTL,
+ .sys_reg = SYS_ID_AA64MMFR2_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64MMFR2_TTL_SHIFT,
+ .min_field_value = 1,
+ .matches = has_cpuid_feature,
+ },
#ifdef CONFIG_ARM64_HW_AFDBM
{
/*
--
2.19.1


2020-06-02 14:01:41

by Zhenyu Ye

[permalink] [raw]
Subject: [PATCH v4 6/6] arm64: tlb: Set the TTL field in flush_*_tlb_range

This patch implement flush_{pmd|pud}_tlb_range() in arm64 by
calling __flush_tlb_range() with the corresponding stride and
tlb_level values.

Signed-off-by: Zhenyu Ye <[email protected]>
---
arch/arm64/include/asm/pgtable.h | 10 ++++++++++
1 file changed, 10 insertions(+)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 538c85e62f86..bc59814eda64 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -40,6 +40,16 @@ extern void __pmd_error(const char *file, int line, unsigned long val);
extern void __pud_error(const char *file, int line, unsigned long val);
extern void __pgd_error(const char *file, int line, unsigned long val);

+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+
+/* Set stride and tlb_level in flush_*_tlb_range */
+#define flush_pmd_tlb_range(vma, addr, end) \
+ __flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2)
+#define flush_pud_tlb_range(vma, addr, end) \
+ __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
--
2.19.1


2020-06-02 14:01:46

by Zhenyu Ye

[permalink] [raw]
Subject: [PATCH v4 2/6] arm64: Add level-hinted TLB invalidation helper

From: Marc Zyngier <[email protected]>

Add a level-hinted TLB invalidation helper that only gets used if
ARMv8.4-TTL gets detected.

Signed-off-by: Marc Zyngier <[email protected]>
Signed-off-by: Zhenyu Ye <[email protected]>
Reviewed-by: Catalin Marinas <[email protected]>
---
arch/arm64/include/asm/tlbflush.h | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bc3949064725..8adbd6fd8489 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -10,6 +10,7 @@

#ifndef __ASSEMBLY__

+#include <linux/bitfield.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
#include <asm/cputype.h>
@@ -59,6 +60,34 @@
__ta; \
})

+#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
+
+#define __tlbi_level(op, addr, level) do { \
+ u64 arg = addr; \
+ \
+ if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \
+ level) { \
+ u64 ttl = level; \
+ \
+ switch (PAGE_SIZE) { \
+ case SZ_4K: \
+ ttl |= 1 << 2; \
+ break; \
+ case SZ_16K: \
+ ttl |= 2 << 2; \
+ break; \
+ case SZ_64K: \
+ ttl |= 3 << 2; \
+ break; \
+ } \
+ \
+ arg &= ~TLBI_TTL_MASK; \
+ arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \
+ } \
+ \
+ __tlbi(op, arg); \
+} while (0)
+
/*
* TLB Invalidation
* ================
--
2.19.1


2020-06-02 14:02:06

by Zhenyu Ye

[permalink] [raw]
Subject: [PATCH v4 4/6] tlb: mmu_gather: add tlb_flush_*_range APIs

From: "Peter Zijlstra (Intel)" <[email protected]>

tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and
tlb->end, then set corresponding cleared_*.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Zhenyu Ye <[email protected]>
Acked-by: Catalin Marinas <[email protected]>
---
include/asm-generic/tlb.h | 55 ++++++++++++++++++++++++++++-----------
1 file changed, 40 insertions(+), 15 deletions(-)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 3f1649a8cf55..ef75ec86f865 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -512,6 +512,38 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
}
#endif

+/*
+ * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end,
+ * and set corresponding cleared_*.
+ */
+static inline void tlb_flush_pte_range(struct mmu_gather *tlb,
+ unsigned long address, unsigned long size)
+{
+ __tlb_adjust_range(tlb, address, size);
+ tlb->cleared_ptes = 1;
+}
+
+static inline void tlb_flush_pmd_range(struct mmu_gather *tlb,
+ unsigned long address, unsigned long size)
+{
+ __tlb_adjust_range(tlb, address, size);
+ tlb->cleared_pmds = 1;
+}
+
+static inline void tlb_flush_pud_range(struct mmu_gather *tlb,
+ unsigned long address, unsigned long size)
+{
+ __tlb_adjust_range(tlb, address, size);
+ tlb->cleared_puds = 1;
+}
+
+static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
+ unsigned long address, unsigned long size)
+{
+ __tlb_adjust_range(tlb, address, size);
+ tlb->cleared_p4ds = 1;
+}
+
#ifndef __tlb_remove_tlb_entry
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#endif
@@ -525,19 +557,17 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
*/
#define tlb_remove_tlb_entry(tlb, ptep, address) \
do { \
- __tlb_adjust_range(tlb, address, PAGE_SIZE); \
- tlb->cleared_ptes = 1; \
+ tlb_flush_pte_range(tlb, address, PAGE_SIZE); \
__tlb_remove_tlb_entry(tlb, ptep, address); \
} while (0)

#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
do { \
unsigned long _sz = huge_page_size(h); \
- __tlb_adjust_range(tlb, address, _sz); \
if (_sz == PMD_SIZE) \
- tlb->cleared_pmds = 1; \
+ tlb_flush_pmd_range(tlb, address, _sz); \
else if (_sz == PUD_SIZE) \
- tlb->cleared_puds = 1; \
+ tlb_flush_pud_range(tlb, address, _sz); \
__tlb_remove_tlb_entry(tlb, ptep, address); \
} while (0)

@@ -551,8 +581,7 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm

#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \
do { \
- __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \
- tlb->cleared_pmds = 1; \
+ tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE); \
__tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \
} while (0)

@@ -566,8 +595,7 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm

#define tlb_remove_pud_tlb_entry(tlb, pudp, address) \
do { \
- __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \
- tlb->cleared_puds = 1; \
+ tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE); \
__tlb_remove_pud_tlb_entry(tlb, pudp, address); \
} while (0)

@@ -592,9 +620,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
#ifndef pte_free_tlb
#define pte_free_tlb(tlb, ptep, address) \
do { \
- __tlb_adjust_range(tlb, address, PAGE_SIZE); \
+ tlb_flush_pmd_range(tlb, address, PAGE_SIZE); \
tlb->freed_tables = 1; \
- tlb->cleared_pmds = 1; \
__pte_free_tlb(tlb, ptep, address); \
} while (0)
#endif
@@ -602,9 +629,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
#ifndef pmd_free_tlb
#define pmd_free_tlb(tlb, pmdp, address) \
do { \
- __tlb_adjust_range(tlb, address, PAGE_SIZE); \
+ tlb_flush_pud_range(tlb, address, PAGE_SIZE); \
tlb->freed_tables = 1; \
- tlb->cleared_puds = 1; \
__pmd_free_tlb(tlb, pmdp, address); \
} while (0)
#endif
@@ -612,9 +638,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
#ifndef pud_free_tlb
#define pud_free_tlb(tlb, pudp, address) \
do { \
- __tlb_adjust_range(tlb, address, PAGE_SIZE); \
+ tlb_flush_p4d_range(tlb, address, PAGE_SIZE); \
tlb->freed_tables = 1; \
- tlb->cleared_p4ds = 1; \
__pud_free_tlb(tlb, pudp, address); \
} while (0)
#endif
--
2.19.1


2020-06-02 14:02:27

by Zhenyu Ye

[permalink] [raw]
Subject: [PATCH v4 3/6] arm64: Add tlbi_user_level TLB invalidation helper

Add a level-hinted parameter to __tlbi_user, which only gets used
if ARMv8.4-TTL gets detected.

ARMv8.4-TTL provides the TTL field in tlbi instruction to indicate
the level of translation table walk holding the leaf entry for the
address that is being invalidated.

This patch set the default level value of flush_tlb_range() to 0,
which will be updated in future patches. And set the ttl value of
flush_tlb_page_nosync() to 3 because it is only called to flush a
single pte page.

Signed-off-by: Zhenyu Ye <[email protected]>
---
arch/arm64/include/asm/tlbflush.h | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 8adbd6fd8489..bfb58e62c127 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -88,6 +88,12 @@
__tlbi(op, arg); \
} while (0)

+#define __tlbi_user_level(op, arg, level) do { \
+ if (arm64_kernel_unmapped_at_el0()) \
+ __tlbi_level(op, (arg | USER_ASID_FLAG), level); \
+} while (0)
+
+
/*
* TLB Invalidation
* ================
@@ -189,8 +195,9 @@ static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));

dsb(ishst);
- __tlbi(vale1is, addr);
- __tlbi_user(vale1is, addr);
+ /* This function is only called on a small page */
+ __tlbi_level(vale1is, addr, 3);
+ __tlbi_user_level(vale1is, addr, 3);
}

static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -230,11 +237,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
dsb(ishst);
for (addr = start; addr < end; addr += stride) {
if (last_level) {
- __tlbi(vale1is, addr);
- __tlbi_user(vale1is, addr);
+ __tlbi_level(vale1is, addr, 0);
+ __tlbi_user_level(vale1is, addr, 0);
} else {
- __tlbi(vae1is, addr);
- __tlbi_user(vae1is, addr);
+ __tlbi_level(vae1is, addr, 0);
+ __tlbi_user_level(vae1is, addr, 0);
}
}
dsb(ish);
--
2.19.1


2020-06-02 14:04:24

by Zhenyu Ye

[permalink] [raw]
Subject: [PATCH v4 5/6] arm64: tlb: Set the TTL field in flush_tlb_range

This patch uses the cleared_* in struct mmu_gather to set the
TTL field in flush_tlb_range().

Signed-off-by: Zhenyu Ye <[email protected]>
Reviewed-by: Catalin Marinas <[email protected]>
---
arch/arm64/include/asm/tlb.h | 29 ++++++++++++++++++++++++++++-
arch/arm64/include/asm/tlbflush.h | 14 ++++++++------
2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index b76df828e6b7..61c97d3b58c7 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -21,11 +21,37 @@ static void tlb_flush(struct mmu_gather *tlb);

#include <asm-generic/tlb.h>

+/*
+ * get the tlbi levels in arm64. Default value is 0 if more than one
+ * of cleared_* is set or neither is set.
+ * Arm64 doesn't support p4ds now.
+ */
+static inline int tlb_get_level(struct mmu_gather *tlb)
+{
+ if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
+ tlb->cleared_puds ||
+ tlb->cleared_p4ds))
+ return 3;
+
+ if (tlb->cleared_pmds && !(tlb->cleared_ptes ||
+ tlb->cleared_puds ||
+ tlb->cleared_p4ds))
+ return 2;
+
+ if (tlb->cleared_puds && !(tlb->cleared_ptes ||
+ tlb->cleared_pmds ||
+ tlb->cleared_p4ds))
+ return 1;
+
+ return 0;
+}
+
static inline void tlb_flush(struct mmu_gather *tlb)
{
struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
bool last_level = !tlb->freed_tables;
unsigned long stride = tlb_get_unmap_size(tlb);
+ int tlb_level = tlb_get_level(tlb);

/*
* If we're tearing down the address space then we only care about
@@ -38,7 +64,8 @@ static inline void tlb_flush(struct mmu_gather *tlb)
return;
}

- __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level);
+ __flush_tlb_range(&vma, tlb->start, tlb->end, stride,
+ last_level, tlb_level);
}

static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bfb58e62c127..84cb98b60b7b 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -215,7 +215,8 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,

static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
- unsigned long stride, bool last_level)
+ unsigned long stride, bool last_level,
+ int tlb_level)
{
unsigned long asid = ASID(vma->vm_mm);
unsigned long addr;
@@ -237,11 +238,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
dsb(ishst);
for (addr = start; addr < end; addr += stride) {
if (last_level) {
- __tlbi_level(vale1is, addr, 0);
- __tlbi_user_level(vale1is, addr, 0);
+ __tlbi_level(vale1is, addr, tlb_level);
+ __tlbi_user_level(vale1is, addr, tlb_level);
} else {
- __tlbi_level(vae1is, addr, 0);
- __tlbi_user_level(vae1is, addr, 0);
+ __tlbi_level(vae1is, addr, tlb_level);
+ __tlbi_user_level(vae1is, addr, tlb_level);
}
}
dsb(ish);
@@ -253,8 +254,9 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
/*
* We cannot use leaf-only invalidation here, since we may be invalidating
* table entries as part of collapsing hugepages or moving page tables.
+ * Set the tlb_level to 0 because we can not get enough information here.
*/
- __flush_tlb_range(vma, start, end, PAGE_SIZE, false);
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
}

static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
--
2.19.1