From: y00318929 <[email protected]>
This series patches introduce an ASID Isolation mechanism to improve
isolation.
Patch 1-3: Prepare for supporting ASID Isolation mechanism.
Patch 4: Detailed Description and Implementation of ASID Isolation
mechanism.
Patch 5: This patch is used to observe the TLB flush information.
Yunfeng Ye (5):
arm64: mm: Define asid_bitmap structure for pinned_asid
arm64: mm: Extract the processing of asid_generation
arm64: mm: Use cpumask in flush_context()
arm64: mm: Support ASID isolation feature
arm64: mm: Add TLB flush trace on context switch
arch/arm64/mm/context.c | 286 ++++++++++++++++++++++++++++++++++------
1 file changed, 247 insertions(+), 39 deletions(-)
--
2.27.0
It is clearer to use the asid_bitmap structure for pinned_sid, and we
will use it for isolated asid later.
No functional change.
Signed-off-by: Yunfeng Ye <[email protected]>
---
arch/arm64/mm/context.c | 38 +++++++++++++++++++++-----------------
1 file changed, 21 insertions(+), 17 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e1e0dca01839..8549b5f30352 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -17,6 +17,12 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>
+struct asid_bitmap {
+ unsigned long *map;
+ unsigned long nr;
+ unsigned long max;
+};
+
static u32 asid_bits;
static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
@@ -27,9 +33,7 @@ static DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(u64, reserved_asids);
static cpumask_t tlb_flush_pending;
-static unsigned long max_pinned_asids;
-static unsigned long nr_pinned_asids;
-static unsigned long *pinned_asid_map;
+static struct asid_bitmap pinned_asid;
#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
#define ASID_FIRST_VERSION (1UL << asid_bits)
@@ -90,8 +94,8 @@ static void set_kpti_asid_bits(unsigned long *map)
static void set_reserved_asid_bits(void)
{
- if (pinned_asid_map)
- bitmap_copy(asid_map, pinned_asid_map, NUM_USER_ASIDS);
+ if (pinned_asid.map)
+ bitmap_copy(asid_map, pinned_asid.map, NUM_USER_ASIDS);
else if (arm64_kernel_unmapped_at_el0())
set_kpti_asid_bits(asid_map);
else
@@ -275,7 +279,7 @@ unsigned long arm64_mm_context_get(struct mm_struct *mm)
unsigned long flags;
u64 asid;
- if (!pinned_asid_map)
+ if (!pinned_asid.map)
return 0;
raw_spin_lock_irqsave(&cpu_asid_lock, flags);
@@ -285,7 +289,7 @@ unsigned long arm64_mm_context_get(struct mm_struct *mm)
if (refcount_inc_not_zero(&mm->context.pinned))
goto out_unlock;
- if (nr_pinned_asids >= max_pinned_asids) {
+ if (pinned_asid.nr >= pinned_asid.max) {
asid = 0;
goto out_unlock;
}
@@ -299,8 +303,8 @@ unsigned long arm64_mm_context_get(struct mm_struct *mm)
atomic64_set(&mm->context.id, asid);
}
- nr_pinned_asids++;
- __set_bit(ctxid2asid(asid), pinned_asid_map);
+ pinned_asid.nr++;
+ __set_bit(ctxid2asid(asid), pinned_asid.map);
refcount_set(&mm->context.pinned, 1);
out_unlock:
@@ -321,14 +325,14 @@ void arm64_mm_context_put(struct mm_struct *mm)
unsigned long flags;
u64 asid = atomic64_read(&mm->context.id);
- if (!pinned_asid_map)
+ if (!pinned_asid.map)
return;
raw_spin_lock_irqsave(&cpu_asid_lock, flags);
if (refcount_dec_and_test(&mm->context.pinned)) {
- __clear_bit(ctxid2asid(asid), pinned_asid_map);
- nr_pinned_asids--;
+ __clear_bit(ctxid2asid(asid), pinned_asid.map);
+ pinned_asid.nr--;
}
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
@@ -377,8 +381,8 @@ static int asids_update_limit(void)
if (arm64_kernel_unmapped_at_el0()) {
num_available_asids /= 2;
- if (pinned_asid_map)
- set_kpti_asid_bits(pinned_asid_map);
+ if (pinned_asid.map)
+ set_kpti_asid_bits(pinned_asid.map);
}
/*
* Expect allocation after rollover to fail if we don't have at least
@@ -393,7 +397,7 @@ static int asids_update_limit(void)
* even if all CPUs have a reserved ASID and the maximum number of ASIDs
* are pinned, there still is at least one empty slot in the ASID map.
*/
- max_pinned_asids = num_available_asids - num_possible_cpus() - 2;
+ pinned_asid.max = num_available_asids - num_possible_cpus() - 2;
return 0;
}
arch_initcall(asids_update_limit);
@@ -407,8 +411,8 @@ static int asids_init(void)
panic("Failed to allocate bitmap for %lu ASIDs\n",
NUM_USER_ASIDS);
- pinned_asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
- nr_pinned_asids = 0;
+ pinned_asid.map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
+ pinned_asid.nr = 0;
/*
* We cannot call set_reserved_asid_bits() here because CPU
--
2.27.0
We do not know how many times the TLB is flushed on context switch.
Adding trace_tlb_flush() in check_and_switch_context() may be useful.
Signed-off-by: Yunfeng Ye <[email protected]>
---
arch/arm64/mm/context.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 0ea3e7485ae7..eab470a97620 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -19,6 +19,8 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>
+#include <trace/events/tlb.h>
+
struct asid_bitmap {
unsigned long *map;
unsigned long nr;
@@ -60,6 +62,8 @@ static DEFINE_STATIC_KEY_FALSE(asid_isolation_enable);
#define ctxid2asid(asid) ((asid) & ~ASID_MASK)
#define asid2ctxid(asid, genid) ((asid) | (genid))
+#define TLB_FLUSH_ALL (-1)
+
/* Get the ASIDBits supported by the current CPU */
static u32 get_cpu_asid_bits(void)
{
@@ -416,8 +420,10 @@ void check_and_switch_context(struct mm_struct *mm)
atomic64_set(&mm->context.id, asid);
}
- if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+ if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) {
local_flush_tlb_all();
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ }
atomic64_set(this_cpu_ptr(&active_asids), asid);
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
--
2.27.0
Currently, all CPUs are selected to flush TLB in flush_context().
In order to prepare for flushing only part of the CPUs TLB, we use
asid_housekeeping_mask and use cpumask_or() instead of cpumask_setall().
Signed-off-by: Yunfeng Ye <[email protected]>
---
arch/arm64/mm/context.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 380c7b05c36b..e402997aa1c2 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/cpumask.h>
#include <asm/cpufeature.h>
#include <asm/mmu_context.h>
@@ -32,6 +33,7 @@ static unsigned long *asid_map;
static DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(u64, reserved_asids);
static cpumask_t tlb_flush_pending;
+static const struct cpumask *asid_housekeeping_mask;
static struct asid_bitmap pinned_asid;
@@ -129,17 +131,23 @@ static inline bool asid_gen_match(u64 asid)
return asid_match(asid, asid_read_generation());
}
+static const struct cpumask *flush_cpumask(void)
+{
+ return asid_housekeeping_mask;
+}
+
static void flush_context(void)
{
int i;
u64 asid;
+ const struct cpumask *cpumask = flush_cpumask();
flush_generation();
/* Update the list of reserved ASIDs and the ASID bitmap. */
set_reserved_asid_bits();
- for_each_possible_cpu(i) {
+ for_each_cpu(i, cpumask) {
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
/*
* If this CPU has already been through a
@@ -158,7 +166,7 @@ static void flush_context(void)
* Queue a TLB invalidation for each CPU to perform on next
* context-switch
*/
- cpumask_setall(&tlb_flush_pending);
+ cpumask_or(&tlb_flush_pending, &tlb_flush_pending, cpumask);
}
static bool check_update_reserved_asid(u64 asid, u64 newasid)
@@ -439,6 +447,8 @@ static int asids_init(void)
pinned_asid.map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
pinned_asid.nr = 0;
+ asid_housekeeping_mask = cpu_possible_mask;
+
/*
* We cannot call set_reserved_asid_bits() here because CPU
* caps are not finalized yet, so it is safer to assume KPTI
--
2.27.0
To prepare for supporting ASID isolation feature, extract the processing
of asid_generation. it is convenient to modify the asid_generation
centrally.
By the way, It is clearer to put flush_generation() into flush_context().
Signed-off-by: Yunfeng Ye <[email protected]>
---
arch/arm64/mm/context.c | 39 ++++++++++++++++++++++++++++++++-------
1 file changed, 32 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 8549b5f30352..380c7b05c36b 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -102,14 +102,40 @@ static void set_reserved_asid_bits(void)
bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
}
-#define asid_gen_match(asid) \
- (!(((asid) ^ atomic64_read(&asid_generation)) >> asid_bits))
+static void asid_generation_init(void)
+{
+ atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+}
+
+static void flush_generation(void)
+{
+ /* We're out of ASIDs, so increment the global generation count */
+ atomic64_add_return_relaxed(ASID_FIRST_VERSION,
+ &asid_generation);
+}
+
+static inline u64 asid_read_generation(void)
+{
+ return atomic64_read(&asid_generation);
+}
+
+static inline bool asid_match(u64 asid, u64 genid)
+{
+ return (!(((asid) ^ (genid)) >> asid_bits));
+}
+
+static inline bool asid_gen_match(u64 asid)
+{
+ return asid_match(asid, asid_read_generation());
+}
static void flush_context(void)
{
int i;
u64 asid;
+ flush_generation();
+
/* Update the list of reserved ASIDs and the ASID bitmap. */
set_reserved_asid_bits();
@@ -163,7 +189,7 @@ static u64 new_context(struct mm_struct *mm)
{
static u32 cur_idx = 1;
u64 asid = atomic64_read(&mm->context.id);
- u64 generation = atomic64_read(&asid_generation);
+ u64 generation = asid_read_generation();
if (asid != 0) {
u64 newasid = asid2ctxid(ctxid2asid(asid), generation);
@@ -202,14 +228,12 @@ static u64 new_context(struct mm_struct *mm)
if (asid != NUM_USER_ASIDS)
goto set_asid;
- /* We're out of ASIDs, so increment the global generation count */
- generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
- &asid_generation);
flush_context();
/* We have more ASIDs than CPUs, so this will always succeed */
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+ generation = asid_read_generation();
set_asid:
__set_bit(asid, asid_map);
cur_idx = asid;
@@ -405,7 +429,8 @@ arch_initcall(asids_update_limit);
static int asids_init(void)
{
asid_bits = get_cpu_asid_bits();
- atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+ asid_generation_init();
+
asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
if (!asid_map)
panic("Failed to allocate bitmap for %lu ASIDs\n",
--
2.27.0