In order to reduce the cost of TLB invalidation, the ARMv8.4 TTL
feature allows TLBs to be issued with a level allowing for quicker
invalidation. This series provide support for this feature.
Patch 1 and Patch 2 was provided by Marc on his NV series[1] patches,
which detect the TTL feature and add __tlbi_level interface.
See patches for details, Thanks.
[1] https://lore.kernel.org/linux-arm-kernel/[email protected]/
ChangeList:
v1:
add support for TTL feature in arm64.
v2:
build the patch on Marc's NV series[1].
v3:
use vma->vm_flags to replace mm->context.flags.
v4:
add Marc's patches into my series.
Marc Zyngier (2):
arm64: Detect the ARMv8.4 TTL feature
arm64: Add level-hinted TLB invalidation helper
Zhenyu Ye (4):
arm64: Add level-hinted TLB invalidation helper to tlbi_user
mm: Add page table level flags to vm_flags
arm64: tlb: Use translation level hint in vm_flags
mm: Set VM_LEVEL flags in some tlb_flush functions
arch/arm64/include/asm/cpucaps.h | 3 +-
arch/arm64/include/asm/mmu.h | 2 +
arch/arm64/include/asm/sysreg.h | 1 +
arch/arm64/include/asm/tlb.h | 12 +++++
arch/arm64/include/asm/tlbflush.h | 74 ++++++++++++++++++++++++++++---
arch/arm64/kernel/cpufeature.c | 11 +++++
arch/arm64/mm/hugetlbpage.c | 4 +-
arch/arm64/mm/mmu.c | 14 ++++++
include/asm-generic/pgtable.h | 16 ++++++-
include/linux/mm.h | 10 +++++
include/trace/events/mmflags.h | 15 ++++++-
mm/huge_memory.c | 8 +++-
12 files changed, 157 insertions(+), 13 deletions(-)
--
2.19.1
From: Marc Zyngier <[email protected]>
Add a level-hinted TLB invalidation helper that only gets used if
ARMv8.4-TTL gets detected.
Signed-off-by: Marc Zyngier <[email protected]>
Signed-off-by: Zhenyu Ye <[email protected]>
---
arch/arm64/include/asm/tlbflush.h | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bc3949064725..a3f70778a325 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -10,6 +10,7 @@
#ifndef __ASSEMBLY__
+#include <linux/bitfield.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
#include <asm/cputype.h>
@@ -59,6 +60,35 @@
__ta; \
})
+#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
+
+#define __tlbi_level(op, addr, level) \
+ do { \
+ u64 arg = addr; \
+ \
+ if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \
+ level) { \
+ u64 ttl = level; \
+ \
+ switch (PAGE_SIZE) { \
+ case SZ_4K: \
+ ttl |= 1 << 2; \
+ break; \
+ case SZ_16K: \
+ ttl |= 2 << 2; \
+ break; \
+ case SZ_64K: \
+ ttl |= 3 << 2; \
+ break; \
+ } \
+ \
+ arg &= ~TLBI_TTL_MASK; \
+ arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \
+ } \
+ \
+ __tlbi(op, arg); \
+ } while(0)
+
/*
* TLB Invalidation
* ================
--
2.19.1
Add a level-hinted parameter to __tlbi_user, which only gets used
if ARMv8.4-TTL gets detected.
ARMv8.4-TTL provides the TTL field in tlbi instruction to indicate
the level of translation table walk holding the leaf entry for the
address that is being invalidated.
This patch set the default level value to 0.
Signed-off-by: Zhenyu Ye <[email protected]>
---
arch/arm64/include/asm/tlbflush.h | 42 ++++++++++++++++++++++++++-----
1 file changed, 36 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index a3f70778a325..d141c080e494 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -89,6 +89,36 @@
__tlbi(op, arg); \
} while(0)
+#define __tlbi_user_level(op, addr, level) \
+ do { \
+ u64 arg = addr; \
+ \
+ if (!arm64_kernel_unmapped_at_el0()) \
+ break; \
+ \
+ if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \
+ level) { \
+ u64 ttl = level; \
+ \
+ switch (PAGE_SIZE) { \
+ case SZ_4K: \
+ ttl |= 1 << 2; \
+ break; \
+ case SZ_16K: \
+ ttl |= 2 << 2; \
+ break; \
+ case SZ_64K: \
+ ttl |= 3 << 2; \
+ break; \
+ } \
+ \
+ arg &= ~TLBI_TTL_MASK; \
+ arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \
+ } \
+ \
+ __tlbi(op, (arg) | USER_ASID_FLAG); \
+ } while (0)
+
/*
* TLB Invalidation
* ================
@@ -190,8 +220,8 @@ static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
dsb(ishst);
- __tlbi(vale1is, addr);
- __tlbi_user(vale1is, addr);
+ __tlbi_level(vale1is, addr, 0);
+ __tlbi_user_level(vale1is, addr, 0);
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -231,11 +261,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
dsb(ishst);
for (addr = start; addr < end; addr += stride) {
if (last_level) {
- __tlbi(vale1is, addr);
- __tlbi_user(vale1is, addr);
+ __tlbi_level(vale1is, addr, 0);
+ __tlbi_user_level(vale1is, addr, 0);
} else {
- __tlbi(vae1is, addr);
- __tlbi_user(vae1is, addr);
+ __tlbi_level(vae1is, addr, 0);
+ __tlbi_user_level(vae1is, addr, 0);
}
}
dsb(ish);
--
2.19.1
This patch used the VM_LEVEL flags in vma->vm_flags to set the
TTL field in tlbi instruction.
Signed-off-by: Zhenyu Ye <[email protected]>
---
arch/arm64/include/asm/mmu.h | 2 ++
arch/arm64/include/asm/tlbflush.h | 14 ++++++++------
arch/arm64/mm/mmu.c | 14 ++++++++++++++
3 files changed, 24 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index d79ce6df9e12..a8b8824a7405 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -86,6 +86,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
extern void mark_linear_text_alias_ro(void);
extern bool kaslr_requires_kpti(void);
+extern unsigned int get_vma_level(struct vm_area_struct *vma);
+
#define INIT_MM_CONTEXT(name) \
.pgd = init_pg_dir,
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index d141c080e494..93bb09fdfafd 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -218,10 +218,11 @@ static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
unsigned long uaddr)
{
unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
+ unsigned int level = get_vma_level(vma);
dsb(ishst);
- __tlbi_level(vale1is, addr, 0);
- __tlbi_user_level(vale1is, addr, 0);
+ __tlbi_level(vale1is, addr, level);
+ __tlbi_user_level(vale1is, addr, level);
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -242,6 +243,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long stride, bool last_level)
{
unsigned long asid = ASID(vma->vm_mm);
+ unsigned int level = get_vma_level(vma);
unsigned long addr;
start = round_down(start, stride);
@@ -261,11 +263,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
dsb(ishst);
for (addr = start; addr < end; addr += stride) {
if (last_level) {
- __tlbi_level(vale1is, addr, 0);
- __tlbi_user_level(vale1is, addr, 0);
+ __tlbi_level(vale1is, addr, level);
+ __tlbi_user_level(vale1is, addr, level);
} else {
- __tlbi_level(vae1is, addr, 0);
- __tlbi_user_level(vae1is, addr, 0);
+ __tlbi_level(vae1is, addr, level);
+ __tlbi_user_level(vae1is, addr, level);
}
}
dsb(ish);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 128f70852bf3..e6a1221cd86b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -60,6 +60,20 @@ static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
static DEFINE_SPINLOCK(swapper_pgdir_lock);
+inline unsigned int get_vma_level(struct vm_area_struct *vma)
+{
+ unsigned int level = 0;
+ if (vma->vm_flags & VM_LEVEL_PUD)
+ level = 1;
+ else if (vma->vm_flags & VM_LEVEL_PMD)
+ level = 2;
+ else if (vma->vm_flags & VM_LEVEL_PTE)
+ level = 3;
+
+ vma->vm_flags &= ~(VM_LEVEL_PUD | VM_LEVEL_PMD | VM_LEVEL_PTE);
+ return level;
+}
+
void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
{
pgd_t *fixmap_pgdp;
--
2.19.1
From: Marc Zyngier <[email protected]>
In order to reduce the cost of TLB invalidation, the ARMv8.4 TTL
feature allows TLBs to be issued with a level allowing for quicker
invalidation.
Let's detect the feature for now. Further patches will implement
its actual usage.
Signed-off-by: Marc Zyngier <[email protected]>
Signed-off-by: Zhenyu Ye <[email protected]>
---
arch/arm64/include/asm/cpucaps.h | 3 ++-
arch/arm64/include/asm/sysreg.h | 1 +
arch/arm64/kernel/cpufeature.c | 11 +++++++++++
3 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 865e0253fc1e..8b3b4dd612b3 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -58,7 +58,8 @@
#define ARM64_WORKAROUND_SPECULATIVE_AT_NVHE 48
#define ARM64_HAS_E0PD 49
#define ARM64_HAS_RNG 50
+#define ARM64_HAS_ARMv8_4_TTL 51
-#define ARM64_NCAPS 51
+#define ARM64_NCAPS 52
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index b91570ff9db1..a28b76f32ba7 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -685,6 +685,7 @@
/* id_aa64mmfr2 */
#define ID_AA64MMFR2_E0PD_SHIFT 60
+#define ID_AA64MMFR2_TTL_SHIFT 48
#define ID_AA64MMFR2_FWB_SHIFT 40
#define ID_AA64MMFR2_AT_SHIFT 32
#define ID_AA64MMFR2_LVA_SHIFT 16
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0b6715625cf6..cbe46ad2900a 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -241,6 +241,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_TTL_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
@@ -1523,6 +1524,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.cpu_enable = cpu_has_fwb,
},
+ {
+ .desc = "ARMv8.4 Translation Table Level",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_ARMv8_4_TTL,
+ .sys_reg = SYS_ID_AA64MMFR2_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64MMFR2_TTL_SHIFT,
+ .min_field_value = 1,
+ .matches = has_cpuid_feature,
+ },
#ifdef CONFIG_ARM64_HW_AFDBM
{
/*
--
2.19.1
Set VM_LEVEL flags in some tlb_flush functions.
The relevant functions are:
tlb_flush in asm/tlb.h
get_clear_flush and clear_flush in mm/hugetlbpage.c
flush_pmd|pud_tlb_range in asm-generic/patable.h
do_huge_pmd_numa_page and move_huge_pmd in mm/huge_memory.c
Signed-off-by: Zhenyu Ye <[email protected]>
---
arch/arm64/include/asm/tlb.h | 12 ++++++++++++
arch/arm64/mm/hugetlbpage.c | 4 ++--
include/asm-generic/pgtable.h | 16 ++++++++++++++--
mm/huge_memory.c | 8 +++++++-
4 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index b76df828e6b7..77fe942b30b6 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -27,6 +27,18 @@ static inline void tlb_flush(struct mmu_gather *tlb)
bool last_level = !tlb->freed_tables;
unsigned long stride = tlb_get_unmap_size(tlb);
+ /*
+ * mm_gather tracked which levels of the page tables
+ * have been cleared, we can use this info to set
+ * vm->vm_flags.
+ */
+ if (tlb->cleared_ptes)
+ vma.vm_flags |= VM_LEVEL_PTE;
+ else if (tlb->cleared_pmds)
+ vma.vm_flags |= VM_LEVEL_PMD;
+ else if (tlb->cleared_puds)
+ vma.vm_flags |= VM_LEVEL_PUD;
+
/*
* If we're tearing down the address space then we only care about
* invalidating the walk-cache, since the ASID allocator won't
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index bbeb6a5a6ba6..c35a1bd06bd0 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -140,7 +140,7 @@ static pte_t get_clear_flush(struct mm_struct *mm,
}
if (valid) {
- struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
+ struct vm_area_struct vma = TLB_FLUSH_VMA(mm, VM_LEVEL_PTE);
flush_tlb_range(&vma, saddr, addr);
}
return orig_pte;
@@ -161,7 +161,7 @@ static void clear_flush(struct mm_struct *mm,
unsigned long pgsize,
unsigned long ncontig)
{
- struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
+ struct vm_area_struct vma = TLB_FLUSH_VMA(mm, VM_LEVEL_PTE);
unsigned long i, saddr = addr;
for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index e2e2bef07dd2..391e704faf7a 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1160,8 +1160,20 @@ static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
* invalidate the entire TLB which is not desitable.
* e.g. see arch/arc: flush_pmd_tlb_range
*/
-#define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
-#define flush_pud_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
+#define flush_pmd_tlb_range(vma, addr, end) \
+ do { \
+ vma->vm_flags &= ~(VM_LEVEL_PUD | VM_LEVEL_PTE); \
+ vma->vm_flags |= VM_LEVEL_PMD; \
+ flush_tlb_range(vma, addr, end); \
+ } while (0)
+
+#define flush_pud_tlb_range(vma, addr, end) \
+ do { \
+ vma->vm_flags &= ~(VM_LEVEL_PMD | VM_LEVEL_PTE); \
+ vma->vm_flags |= VM_LEVEL_PUD; \
+ flush_tlb_range(vma, addr, end); \
+ } while (0)
+
#else
#define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG()
#define flush_pud_tlb_range(vma, addr, end) BUILD_BUG()
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 24ad53b4dfc0..9a78b8d865f0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1646,6 +1646,8 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
* mapping or not. Hence use the tlb range variant
*/
if (mm_tlb_flush_pending(vma->vm_mm)) {
+ vma->vm_flags &= ~(VM_LEVEL_PUD | VM_LEVEL_PTE);
+ vma->vm_flags |= VM_LEVEL_PMD;
flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
/*
* change_huge_pmd() released the pmd lock before
@@ -1917,8 +1919,12 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
}
pmd = move_soft_dirty_pmd(pmd);
set_pmd_at(mm, new_addr, new_pmd, pmd);
- if (force_flush)
+ if (force_flush) {
+ vma->vm_flags &= ~(VM_LEVEL_PUD | VM_LEVEL_PTE);
+ vma->vm_flags |= VM_LEVEL_PMD;
flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
+ }
+
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
spin_unlock(old_ptl);
--
2.19.1
Add VM_LEVEL_[PUD|PMD|PTE] to vm_flags to indicate which level of
the page tables the vma is in. Those flags can be used to reduce
the cost of TLB invalidation.
These should be common flags for all architectures, however, those
flags are only available in 64-bits system currently, because the
lower-order flags are fully used.
These flags are only used by ARM64 architecture now. See in next
patch.
Signed-off-by: Zhenyu Ye <[email protected]>
---
include/linux/mm.h | 10 ++++++++++
include/trace/events/mmflags.h | 15 ++++++++++++++-
2 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c54fb96cb1e6..3ff16ffa5e83 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -313,6 +313,16 @@ extern unsigned int kobjsize(const void *objp);
#endif
#endif /* CONFIG_ARCH_HAS_PKEYS */
+#ifdef CONFIG_64BIT
+# define VM_LEVEL_PUD BIT(37) /* vma is in pud-level of page table */
+# define VM_LEVEL_PMD BIT(38) /* vma is in pmd-level of page table */
+# define VM_LEVEL_PTE BIT(39) /* vma is in pte-level of page table */
+#else
+# define VM_LEVEL_PUD 0
+# define VM_LEVEL_PMD 0
+# define VM_LEVEL_PTE 0
+#endif /* CONFIG_64BIT */
+
#if defined(CONFIG_X86)
# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */
#elif defined(CONFIG_PPC)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index a1675d43777e..9f13cfa96f9f 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -130,6 +130,16 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" )
#define IF_HAVE_VM_SOFTDIRTY(flag,name)
#endif
+#ifdef CONFIG_64BIT
+#define IF_HAVE_VM_LEVEL_PUD(flag,name) {flag, name}
+#define IF_HAVE_VM_LEVEL_PMD(flag,name) {flag, name}
+#define IF_HAVE_VM_LEVEL_PTE(flag,name) {flag, name}
+#else
+#define IF_HAVE_VM_LEVEL_PUD(flag,name)
+#define IF_HAVE_VM_LEVEL_PMD(flag,name)
+#define IF_HAVE_VM_LEVEL_PTE(flag,name)
+#endif
+
#define __def_vmaflag_names \
{VM_READ, "read" }, \
{VM_WRITE, "write" }, \
@@ -161,7 +171,10 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
{VM_MIXEDMAP, "mixedmap" }, \
{VM_HUGEPAGE, "hugepage" }, \
{VM_NOHUGEPAGE, "nohugepage" }, \
- {VM_MERGEABLE, "mergeable" } \
+ {VM_MERGEABLE, "mergeable" }, \
+IF_HAVE_VM_LEVEL_PUD(VM_LEVEL_PUD, "pud-level" ), \
+IF_HAVE_VM_LEVEL_PMD(VM_LEVEL_PMD, "pmd-level" ), \
+IF_HAVE_VM_LEVEL_PTE(VM_LEVEL_PTE, "pte-level" ) \
#define show_vma_flags(flags) \
(flags) ? __print_flags(flags, "|", \
--
2.19.1
On Tue, 24 Mar 2020 21:45:31 +0800
Zhenyu Ye <[email protected]> wrote:
> Add a level-hinted parameter to __tlbi_user, which only gets used
> if ARMv8.4-TTL gets detected.
>
> ARMv8.4-TTL provides the TTL field in tlbi instruction to indicate
> the level of translation table walk holding the leaf entry for the
> address that is being invalidated.
>
> This patch set the default level value to 0.
>
> Signed-off-by: Zhenyu Ye <[email protected]>
> ---
> arch/arm64/include/asm/tlbflush.h | 42 ++++++++++++++++++++++++++-----
> 1 file changed, 36 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> index a3f70778a325..d141c080e494 100644
> --- a/arch/arm64/include/asm/tlbflush.h
> +++ b/arch/arm64/include/asm/tlbflush.h
> @@ -89,6 +89,36 @@
> __tlbi(op, arg); \
> } while(0)
>
> +#define __tlbi_user_level(op, addr, level) \
> + do { \
> + u64 arg = addr; \
> + \
> + if (!arm64_kernel_unmapped_at_el0()) \
> + break; \
> + \
> + if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \
> + level) { \
> + u64 ttl = level; \
> + \
> + switch (PAGE_SIZE) { \
> + case SZ_4K: \
> + ttl |= 1 << 2; \
> + break; \
> + case SZ_16K: \
> + ttl |= 2 << 2; \
> + break; \
> + case SZ_64K: \
> + ttl |= 3 << 2; \
> + break; \
> + } \
> + \
> + arg &= ~TLBI_TTL_MASK; \
> + arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \
> + } \
> + \
> + __tlbi(op, (arg) | USER_ASID_FLAG);
> \
> + } while (0)
> +
Isn't this just:
define __tlbi_user_level(op, addr, level) \
do { \
if (!arm64_kernel_unmapped_at_el0()) \
break; \
\
__tlbi_level(op, addr | USER_ASID_FLAG, level); \
} while (0)
Thanks,
M.
--
Jazz is not dead. It just smells funny...
On Tue, 24 Mar 2020 21:45:33 +0800
Zhenyu Ye <[email protected]> wrote:
> This patch used the VM_LEVEL flags in vma->vm_flags to set the
> TTL field in tlbi instruction.
>
> Signed-off-by: Zhenyu Ye <[email protected]>
> ---
> arch/arm64/include/asm/mmu.h | 2 ++
> arch/arm64/include/asm/tlbflush.h | 14 ++++++++------
> arch/arm64/mm/mmu.c | 14 ++++++++++++++
> 3 files changed, 24 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
> index d79ce6df9e12..a8b8824a7405 100644
> --- a/arch/arm64/include/asm/mmu.h
> +++ b/arch/arm64/include/asm/mmu.h
> @@ -86,6 +86,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
> extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
> extern void mark_linear_text_alias_ro(void);
> extern bool kaslr_requires_kpti(void);
> +extern unsigned int get_vma_level(struct vm_area_struct *vma);
> +
>
> #define INIT_MM_CONTEXT(name) \
> .pgd = init_pg_dir,
> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> index d141c080e494..93bb09fdfafd 100644
> --- a/arch/arm64/include/asm/tlbflush.h
> +++ b/arch/arm64/include/asm/tlbflush.h
> @@ -218,10 +218,11 @@ static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
> unsigned long uaddr)
> {
> unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
> + unsigned int level = get_vma_level(vma);
>
> dsb(ishst);
> - __tlbi_level(vale1is, addr, 0);
> - __tlbi_user_level(vale1is, addr, 0);
> + __tlbi_level(vale1is, addr, level);
> + __tlbi_user_level(vale1is, addr, level);
> }
>
> static inline void flush_tlb_page(struct vm_area_struct *vma,
> @@ -242,6 +243,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
> unsigned long stride, bool last_level)
> {
> unsigned long asid = ASID(vma->vm_mm);
> + unsigned int level = get_vma_level(vma);
> unsigned long addr;
>
> start = round_down(start, stride);
> @@ -261,11 +263,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
> dsb(ishst);
> for (addr = start; addr < end; addr += stride) {
> if (last_level) {
> - __tlbi_level(vale1is, addr, 0);
> - __tlbi_user_level(vale1is, addr, 0);
> + __tlbi_level(vale1is, addr, level);
> + __tlbi_user_level(vale1is, addr, level);
> } else {
> - __tlbi_level(vae1is, addr, 0);
> - __tlbi_user_level(vae1is, addr, 0);
> + __tlbi_level(vae1is, addr, level);
> + __tlbi_user_level(vae1is, addr, level);
> }
> }
> dsb(ish);
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 128f70852bf3..e6a1221cd86b 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -60,6 +60,20 @@ static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
>
> static DEFINE_SPINLOCK(swapper_pgdir_lock);
>
> +inline unsigned int get_vma_level(struct vm_area_struct *vma)
> +{
> + unsigned int level = 0;
> + if (vma->vm_flags & VM_LEVEL_PUD)
> + level = 1;
> + else if (vma->vm_flags & VM_LEVEL_PMD)
> + level = 2;
> + else if (vma->vm_flags & VM_LEVEL_PTE)
> + level = 3;
> +
> + vma->vm_flags &= ~(VM_LEVEL_PUD | VM_LEVEL_PMD | VM_LEVEL_PTE);
> + return level;
> +}
> +
> void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
> {
> pgd_t *fixmap_pgdp;
If feels bizarre a TLBI is now a destructive operation: you've lost the
flags by having cleared them. Even if that's not really a problem in
practice (you issue TLBI because you've unmapped the VMA), it remains
that the act of invalidating TLBs isn't expected to change a kernel
structure (and I'm not even thinking about potential races here).
If anything, I feel this should be based around the mmu_gather
structure, which already tracks the right level of information and
additionally knows about the P4D level which is missing in your patches
(even if arm64 is so far limited to 4 levels).
Thanks,
M.
--
Jazz is not dead. It just smells funny...
On Tue, Mar 24, 2020 at 09:45:28PM +0800, Zhenyu Ye wrote:
> In order to reduce the cost of TLB invalidation, the ARMv8.4 TTL
> feature allows TLBs to be issued with a level allowing for quicker
> invalidation. This series provide support for this feature.
>
> Patch 1 and Patch 2 was provided by Marc on his NV series[1] patches,
> which detect the TTL feature and add __tlbi_level interface.
I realy hate how it makes vma->vm_flags more important for tlbi.
On Tue, 24 Mar 2020 21:45:32 +0800
Zhenyu Ye <[email protected]> wrote:
> --- a/include/trace/events/mmflags.h
> +++ b/include/trace/events/mmflags.h
> @@ -130,6 +130,16 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" )
> #define IF_HAVE_VM_SOFTDIRTY(flag,name)
> #endif
>
> +#ifdef CONFIG_64BIT
> +#define IF_HAVE_VM_LEVEL_PUD(flag,name) {flag, name}
> +#define IF_HAVE_VM_LEVEL_PMD(flag,name) {flag, name}
> +#define IF_HAVE_VM_LEVEL_PTE(flag,name) {flag, name}
> +#else
> +#define IF_HAVE_VM_LEVEL_PUD(flag,name)
> +#define IF_HAVE_VM_LEVEL_PMD(flag,name)
> +#define IF_HAVE_VM_LEVEL_PTE(flag,name)
> +#endif
> +
> #define __def_vmaflag_names \
> {VM_READ, "read" }, \
> {VM_WRITE, "write" }, \
> @@ -161,7 +171,10 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
> {VM_MIXEDMAP, "mixedmap" }, \
> {VM_HUGEPAGE, "hugepage" }, \
> {VM_NOHUGEPAGE, "nohugepage" }, \
> - {VM_MERGEABLE, "mergeable" } \
> + {VM_MERGEABLE, "mergeable" }, \
> +IF_HAVE_VM_LEVEL_PUD(VM_LEVEL_PUD, "pud-level" ), \
> +IF_HAVE_VM_LEVEL_PMD(VM_LEVEL_PMD, "pmd-level" ), \
> +IF_HAVE_VM_LEVEL_PTE(VM_LEVEL_PTE, "pte-level" ) \
>
Have you tested this on 32bit? It looks like you'll get empty commas there.
Perhaps the defines need to be:
#ifdef CONFIG_64BIT
#define IF_HAVE_VM_LEVEL_PUD(flag,name) {flag, name},
#define IF_HAVE_VM_LEVEL_PMD(flag,name) {flag, name},
#define IF_HAVE_VM_LEVEL_PTE(flag,name) {flag, name}
#else
#define IF_HAVE_VM_LEVEL_PUD(flag,name)
#define IF_HAVE_VM_LEVEL_PMD(flag,name)
#define IF_HAVE_VM_LEVEL_PTE(flag,name)
#endif
And leave out the commas in the list.
-- Steve
Hi Steve,
On Wed, 25 Mar 2020 2:45, Steve wrote:
> On Tue, 24 Mar 2020 21:45:32 +0800
> Zhenyu Ye <[email protected]> wrote:
>
> > --- a/include/trace/events/mmflags.h
> > +++ b/include/trace/events/mmflags.h
> > @@ -130,6 +130,16 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" )
> > #define IF_HAVE_VM_SOFTDIRTY(flag,name)
> > #endif
> >
> > +#ifdef CONFIG_64BIT
> > +#define IF_HAVE_VM_LEVEL_PUD(flag,name) {flag, name}
> > +#define IF_HAVE_VM_LEVEL_PMD(flag,name) {flag, name}
> > +#define IF_HAVE_VM_LEVEL_PTE(flag,name) {flag, name}
> > +#else
> > +#define IF_HAVE_VM_LEVEL_PUD(flag,name)
> > +#define IF_HAVE_VM_LEVEL_PMD(flag,name)
> > +#define IF_HAVE_VM_LEVEL_PTE(flag,name)
> > +#endif
> > +
> > #define __def_vmaflag_names \
> > {VM_READ, "read" }, \
> > {VM_WRITE, "write" }, \
> > @@ -161,7 +171,10 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,
> "softdirty" ) \
> > {VM_MIXEDMAP, "mixedmap" }, \
> > {VM_HUGEPAGE, "hugepage" }, \
> > {VM_NOHUGEPAGE, "nohugepage" }, \
> > - {VM_MERGEABLE, "mergeable" } \
> > + {VM_MERGEABLE, "mergeable" }, \
> > +IF_HAVE_VM_LEVEL_PUD(VM_LEVEL_PUD, "pud-level" ), \
> > +IF_HAVE_VM_LEVEL_PMD(VM_LEVEL_PMD, "pmd-level" ), \
> > +IF_HAVE_VM_LEVEL_PTE(VM_LEVEL_PTE, "pte-level" ) \
> >
>
> Have you tested this on 32bit? It looks like you'll get empty commas there.
> Perhaps the defines need to be:
>
> #ifdef CONFIG_64BIT
> #define IF_HAVE_VM_LEVEL_PUD(flag,name) {flag, name},
> #define IF_HAVE_VM_LEVEL_PMD(flag,name) {flag, name},
> #define IF_HAVE_VM_LEVEL_PTE(flag,name) {flag, name}
> #else
> #define IF_HAVE_VM_LEVEL_PUD(flag,name)
> #define IF_HAVE_VM_LEVEL_PMD(flag,name)
> #define IF_HAVE_VM_LEVEL_PTE(flag,name)
> #endif
>
> And leave out the commas in the list.
>
> -- Steve
Thanks for your review. I will fix this in next version, if I could still use vm_flags
at that time :).
Zhenyu
Hi Marc,
On 2020/3/24 22:19, Marc Zyngier wrote:
> On Tue, 24 Mar 2020 21:45:31 +0800
> Zhenyu Ye <[email protected]> wrote:
>
>> Add a level-hinted parameter to __tlbi_user, which only gets used
>> if ARMv8.4-TTL gets detected.
>>
>> ARMv8.4-TTL provides the TTL field in tlbi instruction to indicate
>> the level of translation table walk holding the leaf entry for the
>> address that is being invalidated.
>>
>> This patch set the default level value to 0.
>>
>> Signed-off-by: Zhenyu Ye <[email protected]>
>> ---
>> arch/arm64/include/asm/tlbflush.h | 42 ++++++++++++++++++++++++++-----
>> 1 file changed, 36 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
>> index a3f70778a325..d141c080e494 100644
>> --- a/arch/arm64/include/asm/tlbflush.h
>> +++ b/arch/arm64/include/asm/tlbflush.h
>> @@ -89,6 +89,36 @@
>> __tlbi(op, arg); \
>> } while(0)
>>
>> +#define __tlbi_user_level(op, addr, level) \
>> + do { \
>> + u64 arg = addr; \
>> + \
>> + if (!arm64_kernel_unmapped_at_el0()) \
>> + break; \
>> + \
>> + if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \
>> + level) { \
>> + u64 ttl = level; \
>> + \
>> + switch (PAGE_SIZE) { \
>> + case SZ_4K: \
>> + ttl |= 1 << 2; \
>> + break; \
>> + case SZ_16K: \
>> + ttl |= 2 << 2; \
>> + break; \
>> + case SZ_64K: \
>> + ttl |= 3 << 2; \
>> + break; \
>> + } \
>> + \
>> + arg &= ~TLBI_TTL_MASK; \
>> + arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \
>> + } \
>> + \
>> + __tlbi(op, (arg) | USER_ASID_FLAG);
>> \
>> + } while (0)
>> +
>
> Isn't this just:
>
> define __tlbi_user_level(op, addr, level) \
> do { \
> if (!arm64_kernel_unmapped_at_el0()) \
> break; \
> \
> __tlbi_level(op, addr | USER_ASID_FLAG, level); \
> } while (0)
>
> Thanks,
>
> M.
>
Yeah, your code is more clear! I will take it in next version. ;-)
Thanks,
zhenyu
Hi Peter,
On 2020/3/24 23:01, Peter Zijlstra wrote:
> On Tue, Mar 24, 2020 at 09:45:28PM +0800, Zhenyu Ye wrote:
>> In order to reduce the cost of TLB invalidation, the ARMv8.4 TTL
>> feature allows TLBs to be issued with a level allowing for quicker
>> invalidation. This series provide support for this feature.
>>
>> Patch 1 and Patch 2 was provided by Marc on his NV series[1] patches,
>> which detect the TTL feature and add __tlbi_level interface.
>
> I realy hate how it makes vma->vm_flags more important for tlbi.
>
Thanks for your review.
The tlbi interfaces only have two parameters: vma and addr. If we
try to not use vma->vm_flags, we may should have to add a parameter
to some of these interfaces(such as flush_tlb_range), which are
common to all architectures.
I'm not sure if this is feasible, because this feature is only
supported by ARM64 currently.
Thanks,
Zhenyu
Hi Marc,
Thanks for your review.
On 2020/3/24 22:45, Marc Zyngier wrote:
> On Tue, 24 Mar 2020 21:45:33 +0800
> Zhenyu Ye <[email protected]> wrote:
>
>> This patch used the VM_LEVEL flags in vma->vm_flags to set the
>> TTL field in tlbi instruction.
>>
>> Signed-off-by: Zhenyu Ye <[email protected]>
>> ---
>> arch/arm64/include/asm/mmu.h | 2 ++
>> arch/arm64/include/asm/tlbflush.h | 14 ++++++++------
>> arch/arm64/mm/mmu.c | 14 ++++++++++++++
>> 3 files changed, 24 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
>> index d79ce6df9e12..a8b8824a7405 100644
>> --- a/arch/arm64/include/asm/mmu.h
>> +++ b/arch/arm64/include/asm/mmu.h
>> @@ -86,6 +86,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
>> extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
>> extern void mark_linear_text_alias_ro(void);
>> extern bool kaslr_requires_kpti(void);
>> +extern unsigned int get_vma_level(struct vm_area_struct *vma);
>> +
>>
>> #define INIT_MM_CONTEXT(name) \
>> .pgd = init_pg_dir,
>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
>> index d141c080e494..93bb09fdfafd 100644
>> --- a/arch/arm64/include/asm/tlbflush.h
>> +++ b/arch/arm64/include/asm/tlbflush.h
>> @@ -218,10 +218,11 @@ static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
>> unsigned long uaddr)
>> {
>> unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
>> + unsigned int level = get_vma_level(vma);
>>
>> dsb(ishst);
>> - __tlbi_level(vale1is, addr, 0);
>> - __tlbi_user_level(vale1is, addr, 0);
>> + __tlbi_level(vale1is, addr, level);
>> + __tlbi_user_level(vale1is, addr, level);
>> }
>>
>> static inline void flush_tlb_page(struct vm_area_struct *vma,
>> @@ -242,6 +243,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
>> unsigned long stride, bool last_level)
>> {
>> unsigned long asid = ASID(vma->vm_mm);
>> + unsigned int level = get_vma_level(vma);
>> unsigned long addr;
>>
>> start = round_down(start, stride);
>> @@ -261,11 +263,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
>> dsb(ishst);
>> for (addr = start; addr < end; addr += stride) {
>> if (last_level) {
>> - __tlbi_level(vale1is, addr, 0);
>> - __tlbi_user_level(vale1is, addr, 0);
>> + __tlbi_level(vale1is, addr, level);
>> + __tlbi_user_level(vale1is, addr, level);
>> } else {
>> - __tlbi_level(vae1is, addr, 0);
>> - __tlbi_user_level(vae1is, addr, 0);
>> + __tlbi_level(vae1is, addr, level);
>> + __tlbi_user_level(vae1is, addr, level);
>> }
>> }
>> dsb(ish);
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index 128f70852bf3..e6a1221cd86b 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -60,6 +60,20 @@ static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
>>
>> static DEFINE_SPINLOCK(swapper_pgdir_lock);
>>
>> +inline unsigned int get_vma_level(struct vm_area_struct *vma)
>> +{
>> + unsigned int level = 0;
>> + if (vma->vm_flags & VM_LEVEL_PUD)
>> + level = 1;
>> + else if (vma->vm_flags & VM_LEVEL_PMD)
>> + level = 2;
>> + else if (vma->vm_flags & VM_LEVEL_PTE)
>> + level = 3;
>> +
>> + vma->vm_flags &= ~(VM_LEVEL_PUD | VM_LEVEL_PMD | VM_LEVEL_PTE);
>> + return level;
>> +}
>> +
>> void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
>> {
>> pgd_t *fixmap_pgdp;
>
>
> If feels bizarre a TLBI is now a destructive operation: you've lost the
> flags by having cleared them. Even if that's not really a problem in
> practice (you issue TLBI because you've unmapped the VMA), it remains
> that the act of invalidating TLBs isn't expected to change a kernel
> structure (and I'm not even thinking about potential races here).
I cleared vm_flags here just out of caution, because every TLBI flush
action should set vm_flags themself. As I know, the TLB_LEVEL of an vma
will only be changed by transparent hugepage collapse and merge when
the page is not mapped, so there may not have potential races.
But you are right that TLBI should't change a kernel structure.
I will remove the clear action and add some notices here.
>
> If anything, I feel this should be based around the mmu_gather
> structure, which already tracks the right level of information and
> additionally knows about the P4D level which is missing in your patches
> (even if arm64 is so far limited to 4 levels).
>
> Thanks,
>
> M.
>
mmu_gather structure has tracked the level information, but we can only
use the info in the tlb_flush interface. If we want to use the info in
flush_tlb_range, we may should have to add a parameter to this interface,
such as:
flush_tlb_range(vma, start, end, flags);
However, the flush_tlb_range is a common interface to all architectures,
I'm not sure if this is feasible because this feature is only supported
by ARM64 currently.
Or can we ignore the flush_tlb_range and only set the TTL field in
tlb_flush? Waiting for your suggestion...
For P4D level, the TTL field is limited to 4 bit(2 for translation granule
and 2 for page table level), so we can no longer afford more levels :).
On Wed, Mar 25, 2020 at 12:49:45PM +0800, Zhenyu Ye wrote:
> Hi Peter,
>
> On 2020/3/24 23:01, Peter Zijlstra wrote:
> > On Tue, Mar 24, 2020 at 09:45:28PM +0800, Zhenyu Ye wrote:
> >> In order to reduce the cost of TLB invalidation, the ARMv8.4 TTL
> >> feature allows TLBs to be issued with a level allowing for quicker
> >> invalidation. This series provide support for this feature.
> >>
> >> Patch 1 and Patch 2 was provided by Marc on his NV series[1] patches,
> >> which detect the TTL feature and add __tlbi_level interface.
> >
> > I realy hate how it makes vma->vm_flags more important for tlbi.
> >
>
> Thanks for your review.
>
> The tlbi interfaces only have two parameters: vma and addr. If we
> try to not use vma->vm_flags, we may should have to add a parameter
> to some of these interfaces(such as flush_tlb_range), which are
> common to all architectures.
>
> I'm not sure if this is feasible, because this feature is only
> supported by ARM64 currently.
Power (p9-radix) also has level dependent invalidation instructions, so
at the very least you can hook them up as well.
On 2020-03-25 08:00, Zhenyu Ye wrote:
> Hi Marc,
>
> Thanks for your review.
>
> On 2020/3/24 22:45, Marc Zyngier wrote:
>> On Tue, 24 Mar 2020 21:45:33 +0800
>> Zhenyu Ye <[email protected]> wrote:
>>
>>> This patch used the VM_LEVEL flags in vma->vm_flags to set the
>>> TTL field in tlbi instruction.
>>>
>>> Signed-off-by: Zhenyu Ye <[email protected]>
>>> ---
>>> arch/arm64/include/asm/mmu.h | 2 ++
>>> arch/arm64/include/asm/tlbflush.h | 14 ++++++++------
>>> arch/arm64/mm/mmu.c | 14 ++++++++++++++
>>> 3 files changed, 24 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/mmu.h
>>> b/arch/arm64/include/asm/mmu.h
>>> index d79ce6df9e12..a8b8824a7405 100644
>>> --- a/arch/arm64/include/asm/mmu.h
>>> +++ b/arch/arm64/include/asm/mmu.h
>>> @@ -86,6 +86,8 @@ extern void create_pgd_mapping(struct mm_struct
>>> *mm, phys_addr_t phys,
>>> extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size,
>>> pgprot_t prot);
>>> extern void mark_linear_text_alias_ro(void);
>>> extern bool kaslr_requires_kpti(void);
>>> +extern unsigned int get_vma_level(struct vm_area_struct *vma);
>>> +
>>>
>>> #define INIT_MM_CONTEXT(name) \
>>> .pgd = init_pg_dir,
>>> diff --git a/arch/arm64/include/asm/tlbflush.h
>>> b/arch/arm64/include/asm/tlbflush.h
>>> index d141c080e494..93bb09fdfafd 100644
>>> --- a/arch/arm64/include/asm/tlbflush.h
>>> +++ b/arch/arm64/include/asm/tlbflush.h
>>> @@ -218,10 +218,11 @@ static inline void flush_tlb_page_nosync(struct
>>> vm_area_struct *vma,
>>> unsigned long uaddr)
>>> {
>>> unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
>>> + unsigned int level = get_vma_level(vma);
>>>
>>> dsb(ishst);
>>> - __tlbi_level(vale1is, addr, 0);
>>> - __tlbi_user_level(vale1is, addr, 0);
>>> + __tlbi_level(vale1is, addr, level);
>>> + __tlbi_user_level(vale1is, addr, level);
>>> }
>>>
>>> static inline void flush_tlb_page(struct vm_area_struct *vma,
>>> @@ -242,6 +243,7 @@ static inline void __flush_tlb_range(struct
>>> vm_area_struct *vma,
>>> unsigned long stride, bool last_level)
>>> {
>>> unsigned long asid = ASID(vma->vm_mm);
>>> + unsigned int level = get_vma_level(vma);
>>> unsigned long addr;
>>>
>>> start = round_down(start, stride);
>>> @@ -261,11 +263,11 @@ static inline void __flush_tlb_range(struct
>>> vm_area_struct *vma,
>>> dsb(ishst);
>>> for (addr = start; addr < end; addr += stride) {
>>> if (last_level) {
>>> - __tlbi_level(vale1is, addr, 0);
>>> - __tlbi_user_level(vale1is, addr, 0);
>>> + __tlbi_level(vale1is, addr, level);
>>> + __tlbi_user_level(vale1is, addr, level);
>>> } else {
>>> - __tlbi_level(vae1is, addr, 0);
>>> - __tlbi_user_level(vae1is, addr, 0);
>>> + __tlbi_level(vae1is, addr, level);
>>> + __tlbi_user_level(vae1is, addr, level);
>>> }
>>> }
>>> dsb(ish);
>>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>>> index 128f70852bf3..e6a1221cd86b 100644
>>> --- a/arch/arm64/mm/mmu.c
>>> +++ b/arch/arm64/mm/mmu.c
>>> @@ -60,6 +60,20 @@ static pud_t bm_pud[PTRS_PER_PUD]
>>> __page_aligned_bss __maybe_unused;
>>>
>>> static DEFINE_SPINLOCK(swapper_pgdir_lock);
>>>
>>> +inline unsigned int get_vma_level(struct vm_area_struct *vma)
>>> +{
>>> + unsigned int level = 0;
>>> + if (vma->vm_flags & VM_LEVEL_PUD)
>>> + level = 1;
>>> + else if (vma->vm_flags & VM_LEVEL_PMD)
>>> + level = 2;
>>> + else if (vma->vm_flags & VM_LEVEL_PTE)
>>> + level = 3;
>>> +
>>> + vma->vm_flags &= ~(VM_LEVEL_PUD | VM_LEVEL_PMD | VM_LEVEL_PTE);
>>> + return level;
>>> +}
>>> +
>>> void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
>>> {
>>> pgd_t *fixmap_pgdp;
>>
>>
>> If feels bizarre a TLBI is now a destructive operation: you've lost
>> the
>> flags by having cleared them. Even if that's not really a problem in
>> practice (you issue TLBI because you've unmapped the VMA), it remains
>> that the act of invalidating TLBs isn't expected to change a kernel
>> structure (and I'm not even thinking about potential races here).
>
> I cleared vm_flags here just out of caution, because every TLBI flush
> action should set vm_flags themself. As I know, the TLB_LEVEL of an vma
> will only be changed by transparent hugepage collapse and merge when
> the page is not mapped, so there may not have potential races.
>
> But you are right that TLBI should't change a kernel structure.
> I will remove the clear action and add some notices here.
More than that. You are changing the VMA flags at TLBI time already,
when calling get_vma_level(). That is already unacceptable -- I don't
think (and Peter will certainly correct me if I'm wrong) that you
are allowed to change the flags on that code path, as you probably
don't hold the write_lock.
>> If anything, I feel this should be based around the mmu_gather
>> structure, which already tracks the right level of information and
>> additionally knows about the P4D level which is missing in your
>> patches
>> (even if arm64 is so far limited to 4 levels).
>>
>> Thanks,
>>
>> M.
>>
>
> mmu_gather structure has tracked the level information, but we can only
> use the info in the tlb_flush interface. If we want to use the info in
> flush_tlb_range, we may should have to add a parameter to this
> interface,
> such as:
>
> flush_tlb_range(vma, start, end, flags);
>
> However, the flush_tlb_range is a common interface to all
> architectures,
> I'm not sure if this is feasible because this feature is only supported
> by ARM64 currently.
You could always build an on-stack mmu_gather structure and pass it down
to the TLB invalidation helpers.
And frankly, you are not going to be able to fit such a change in the
way Linux deals with TLBs by adding hacks at the periphery. You'll need
to change some of the core abstractions.
Finally, as Peter mentioned separately, there is Power9 which has
similar
instructions, and could make use of it too. So that's yet another reason
to stop hacking at the arch level.
>
> Or can we ignore the flush_tlb_range and only set the TTL field in
> tlb_flush? Waiting for your suggestion...
You could, but you could also ignore TTL altogether (what's the point
in only having half of it?). See my suggestion above.
> For P4D level, the TTL field is limited to 4 bit(2 for translation
> granule
> and 2 for page table level), so we can no longer afford more levels :).
You clearly didn't read the bit that said "even if arm64 is so far
limited
to 4 levels". But again, this is Linux, a multi-architecture kernel, and
not an arm64-special. Changes you make have to work for all
architectures,
and be extensible enough for future changes.
Thanks,
M.
--
Jazz is not dead. It just smells funny...
Hi Zhenyu,
On 3/24/20 1:45 PM, Zhenyu Ye wrote:
> In order to reduce the cost of TLB invalidation, the ARMv8.4 TTL
> feature allows TLBs to be issued with a level allowing for quicker
> invalidation. This series provide support for this feature.
>
> Patch 1 and Patch 2 was provided by Marc on his NV series[1] patches,
> which detect the TTL feature and add __tlbi_level interface.
How does this interact with THP?
(I don't see anything on that in the series.)
With THP, there is no one answer to the size of mapping in a VMA.
This is a problem because the arm-arm has in "Translation table level
hints" in D5.10.2 of DDI0487E.a:
| If an incorrect value for the entry being invalidated by the
| instruction is specified in the TTL field, then no entries are
| required by the architecture to be invalidated from the TLB.
If we get it wrong, not TLB maintenance occurs!
Unless THP leaves its fingerprints on the vma, I think you can only do
this for VMA types that THP can't mess with. (see
transparent_hugepage_enabled())
Thanks,
James
On Wed, Mar 25, 2020 at 04:15:58PM +0000, James Morse wrote:
> Hi Zhenyu,
>
> On 3/24/20 1:45 PM, Zhenyu Ye wrote:
> > In order to reduce the cost of TLB invalidation, the ARMv8.4 TTL
> > feature allows TLBs to be issued with a level allowing for quicker
> > invalidation. This series provide support for this feature.
> >
> > Patch 1 and Patch 2 was provided by Marc on his NV series[1] patches,
> > which detect the TTL feature and add __tlbi_level interface.
>
> How does this interact with THP?
> (I don't see anything on that in the series.)
>
> With THP, there is no one answer to the size of mapping in a VMA.
> This is a problem because the arm-arm has in "Translation table level
> hints" in D5.10.2 of DDI0487E.a:
> | If an incorrect value for the entry being invalidated by the
> | instruction is specified in the TTL field, then no entries are
> | required by the architecture to be invalidated from the TLB.
>
> If we get it wrong, not TLB maintenance occurs!
>
> Unless THP leaves its fingerprints on the vma, I think you can only do
> this for VMA types that THP can't mess with. (see
> transparent_hugepage_enabled())
The convention way to deal with that is to issue the TBLI for all
possible sizes.
Power9 has all this, please look there.
Hi James,
On 2020/3/26 0:15, James Morse wrote:
> Hi Zhenyu,
>
> On 3/24/20 1:45 PM, Zhenyu Ye wrote:
>> In order to reduce the cost of TLB invalidation, the ARMv8.4 TTL
>> feature allows TLBs to be issued with a level allowing for quicker
>> invalidation. This series provide support for this feature.
>>
>> Patch 1 and Patch 2 was provided by Marc on his NV series[1] patches,
>> which detect the TTL feature and add __tlbi_level interface.
>
> How does this interact with THP?
> (I don't see anything on that in the series.)
>
> With THP, there is no one answer to the size of mapping in a VMA.
> This is a problem because the arm-arm has in "Translation table level
> hints" in D5.10.2 of DDI0487E.a:
> | If an incorrect value for the entry being invalidated by the
> | instruction is specified in the TTL field, then no entries are
> | required by the architecture to be invalidated from the TLB.
>
> If we get it wrong, not TLB maintenance occurs!
>
Thanks for your review. With THP, we should update the TTL value
after the page collapse and merge. If not sure what it should be,
we can set it to 0 to avoid "no TLB maintenance occur" problem.
The Table D5-53 in DDI0487E.a says:
| when TTL[1:0] is 0b00:
| This value is reserved, and hardware should treat this as if TTL[3:2] is 0b00
| when TTL[3:2] is 0b00:
| Hardware must assume that the entry can be from any level.
> Unless THP leaves its fingerprints on the vma, I think you can only do
> this for VMA types that THP can't mess with. (see
> transparent_hugepage_enabled())
>
I will try to add struct mmu_gather to TLBI interfaces, which has enough
info to track tlb's level. See in next patch version!
Thanks,
Zhenyu
.
Hi Marc,
On 2020/3/25 22:13, Marc Zyngier wrote:
>>>>
>>>> +inline unsigned int get_vma_level(struct vm_area_struct *vma)
>>>> +{
>>>> + unsigned int level = 0;
>>>> + if (vma->vm_flags & VM_LEVEL_PUD)
>>>> + level = 1;
>>>> + else if (vma->vm_flags & VM_LEVEL_PMD)
>>>> + level = 2;
>>>> + else if (vma->vm_flags & VM_LEVEL_PTE)
>>>> + level = 3;
>>>> +
>>>> + vma->vm_flags &= ~(VM_LEVEL_PUD | VM_LEVEL_PMD | VM_LEVEL_PTE);
>>>> + return level;
>>>> +}
>>>> +
>>>> void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
>>>> {
>>>> pgd_t *fixmap_pgdp;
>>>
>>>
>>> If feels bizarre a TLBI is now a destructive operation: you've lost the
>>> flags by having cleared them. Even if that's not really a problem in
>>> practice (you issue TLBI because you've unmapped the VMA), it remains
>>> that the act of invalidating TLBs isn't expected to change a kernel
>>> structure (and I'm not even thinking about potential races here).
>>
>> I cleared vm_flags here just out of caution, because every TLBI flush
>> action should set vm_flags themself. As I know, the TLB_LEVEL of an vma
>> will only be changed by transparent hugepage collapse and merge when
>> the page is not mapped, so there may not have potential races.
>>
>> But you are right that TLBI should't change a kernel structure.
>> I will remove the clear action and add some notices here.
>
> More than that. You are changing the VMA flags at TLBI time already,
> when calling get_vma_level(). That is already unacceptable -- I don't
> think (and Peter will certainly correct me if I'm wrong) that you
> are allowed to change the flags on that code path, as you probably
> don't hold the write_lock.
>
Thanks for your review. I will avoid this problem in next version.
>>> If anything, I feel this should be based around the mmu_gather
>>> structure, which already tracks the right level of information and
>>> additionally knows about the P4D level which is missing in your patches
>>> (even if arm64 is so far limited to 4 levels).
>>>
>>> Thanks,
>>>
>>> M.
>>>
>>
>> mmu_gather structure has tracked the level information, but we can only
>> use the info in the tlb_flush interface. If we want to use the info in
>> flush_tlb_range, we may should have to add a parameter to this interface,
>> such as:
>>
>> flush_tlb_range(vma, start, end, flags);
>>
>> However, the flush_tlb_range is a common interface to all architectures,
>> I'm not sure if this is feasible because this feature is only supported
>> by ARM64 currently.
>
> You could always build an on-stack mmu_gather structure and pass it down
> to the TLB invalidation helpers.
>
> And frankly, you are not going to be able to fit such a change in the
> way Linux deals with TLBs by adding hacks at the periphery. You'll need
> to change some of the core abstractions.
>
> Finally, as Peter mentioned separately, there is Power9 which has similar
> instructions, and could make use of it too. So that's yet another reason
> to stop hacking at the arch level.
>
OK, I will try to add struct mmu_gather to flush_tlb_range, such as:
void flush_tlb_range(struct mmu_gather *tlb,
struct vm_area_struct *vma,
unsigned long start, unsigned long end);
This will involve all architectures, I will do it carefully.
>>
>> Or can we ignore the flush_tlb_range and only set the TTL field in
>> tlb_flush? Waiting for your suggestion...
>
> You could, but you could also ignore TTL altogether (what's the point
> in only having half of it?). See my suggestion above.
>
>> For P4D level, the TTL field is limited to 4 bit(2 for translation granule
>> and 2 for page table level), so we can no longer afford more levels :).
>
> You clearly didn't read the bit that said "even if arm64 is so far limited
> to 4 levels". But again, this is Linux, a multi-architecture kernel, and
> not an arm64-special. Changes you make have to work for all architectures,
> and be extensible enough for future changes.
>
Using the struct mmu_gather to pass the TTL value will not have
this problem :).
Thanks,
Zhenyu
On 2020/3/25 21:32, Peter Zijlstra wrote:
> On Wed, Mar 25, 2020 at 12:49:45PM +0800, Zhenyu Ye wrote:
>> Hi Peter,
>>
>> On 2020/3/24 23:01, Peter Zijlstra wrote:
>>> On Tue, Mar 24, 2020 at 09:45:28PM +0800, Zhenyu Ye wrote:
>>>> In order to reduce the cost of TLB invalidation, the ARMv8.4 TTL
>>>> feature allows TLBs to be issued with a level allowing for quicker
>>>> invalidation. This series provide support for this feature.
>>>>
>>>> Patch 1 and Patch 2 was provided by Marc on his NV series[1] patches,
>>>> which detect the TTL feature and add __tlbi_level interface.
>>>
>>> I realy hate how it makes vma->vm_flags more important for tlbi.
>>>
>>
>> Thanks for your review.
>>
>> The tlbi interfaces only have two parameters: vma and addr. If we
>> try to not use vma->vm_flags, we may should have to add a parameter
>> to some of these interfaces(such as flush_tlb_range), which are
>> common to all architectures.
>>
>> I'm not sure if this is feasible, because this feature is only
>> supported by ARM64 currently.
>
> Power (p9-radix) also has level dependent invalidation instructions, so
> at the very least you can hook them up as well.
>
> .
>
Thanks, I will push my next version soon.
Zhenyu
.