2022-10-17 08:36:05

by Yunfeng Ye

[permalink] [raw]
Subject: [PATCH 0/5] Support ASID Isolation mechanism

From: y00318929 <[email protected]>

This series patches introduce an ASID Isolation mechanism to improve
isolation.

Patch 1-3: Prepare for supporting ASID Isolation mechanism.

Patch 4: Detailed Description and Implementation of ASID Isolation
mechanism.

Patch 5: This patch is used to observe the TLB flush information.

Yunfeng Ye (5):
arm64: mm: Define asid_bitmap structure for pinned_asid
arm64: mm: Extract the processing of asid_generation
arm64: mm: Use cpumask in flush_context()
arm64: mm: Support ASID isolation feature
arm64: mm: Add TLB flush trace on context switch

arch/arm64/mm/context.c | 286 ++++++++++++++++++++++++++++++++++------
1 file changed, 247 insertions(+), 39 deletions(-)

--
2.27.0


2022-10-17 08:36:15

by Yunfeng Ye

[permalink] [raw]
Subject: [PATCH 1/5] arm64: mm: Define asid_bitmap structure for pinned_asid

It is clearer to use the asid_bitmap structure for pinned_sid, and we
will use it for isolated asid later.

No functional change.

Signed-off-by: Yunfeng Ye <[email protected]>
---
arch/arm64/mm/context.c | 38 +++++++++++++++++++++-----------------
1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e1e0dca01839..8549b5f30352 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -17,6 +17,12 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>

+struct asid_bitmap {
+ unsigned long *map;
+ unsigned long nr;
+ unsigned long max;
+};
+
static u32 asid_bits;
static DEFINE_RAW_SPINLOCK(cpu_asid_lock);

@@ -27,9 +33,7 @@ static DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(u64, reserved_asids);
static cpumask_t tlb_flush_pending;

-static unsigned long max_pinned_asids;
-static unsigned long nr_pinned_asids;
-static unsigned long *pinned_asid_map;
+static struct asid_bitmap pinned_asid;

#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
#define ASID_FIRST_VERSION (1UL << asid_bits)
@@ -90,8 +94,8 @@ static void set_kpti_asid_bits(unsigned long *map)

static void set_reserved_asid_bits(void)
{
- if (pinned_asid_map)
- bitmap_copy(asid_map, pinned_asid_map, NUM_USER_ASIDS);
+ if (pinned_asid.map)
+ bitmap_copy(asid_map, pinned_asid.map, NUM_USER_ASIDS);
else if (arm64_kernel_unmapped_at_el0())
set_kpti_asid_bits(asid_map);
else
@@ -275,7 +279,7 @@ unsigned long arm64_mm_context_get(struct mm_struct *mm)
unsigned long flags;
u64 asid;

- if (!pinned_asid_map)
+ if (!pinned_asid.map)
return 0;

raw_spin_lock_irqsave(&cpu_asid_lock, flags);
@@ -285,7 +289,7 @@ unsigned long arm64_mm_context_get(struct mm_struct *mm)
if (refcount_inc_not_zero(&mm->context.pinned))
goto out_unlock;

- if (nr_pinned_asids >= max_pinned_asids) {
+ if (pinned_asid.nr >= pinned_asid.max) {
asid = 0;
goto out_unlock;
}
@@ -299,8 +303,8 @@ unsigned long arm64_mm_context_get(struct mm_struct *mm)
atomic64_set(&mm->context.id, asid);
}

- nr_pinned_asids++;
- __set_bit(ctxid2asid(asid), pinned_asid_map);
+ pinned_asid.nr++;
+ __set_bit(ctxid2asid(asid), pinned_asid.map);
refcount_set(&mm->context.pinned, 1);

out_unlock:
@@ -321,14 +325,14 @@ void arm64_mm_context_put(struct mm_struct *mm)
unsigned long flags;
u64 asid = atomic64_read(&mm->context.id);

- if (!pinned_asid_map)
+ if (!pinned_asid.map)
return;

raw_spin_lock_irqsave(&cpu_asid_lock, flags);

if (refcount_dec_and_test(&mm->context.pinned)) {
- __clear_bit(ctxid2asid(asid), pinned_asid_map);
- nr_pinned_asids--;
+ __clear_bit(ctxid2asid(asid), pinned_asid.map);
+ pinned_asid.nr--;
}

raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
@@ -377,8 +381,8 @@ static int asids_update_limit(void)

if (arm64_kernel_unmapped_at_el0()) {
num_available_asids /= 2;
- if (pinned_asid_map)
- set_kpti_asid_bits(pinned_asid_map);
+ if (pinned_asid.map)
+ set_kpti_asid_bits(pinned_asid.map);
}
/*
* Expect allocation after rollover to fail if we don't have at least
@@ -393,7 +397,7 @@ static int asids_update_limit(void)
* even if all CPUs have a reserved ASID and the maximum number of ASIDs
* are pinned, there still is at least one empty slot in the ASID map.
*/
- max_pinned_asids = num_available_asids - num_possible_cpus() - 2;
+ pinned_asid.max = num_available_asids - num_possible_cpus() - 2;
return 0;
}
arch_initcall(asids_update_limit);
@@ -407,8 +411,8 @@ static int asids_init(void)
panic("Failed to allocate bitmap for %lu ASIDs\n",
NUM_USER_ASIDS);

- pinned_asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
- nr_pinned_asids = 0;
+ pinned_asid.map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
+ pinned_asid.nr = 0;

/*
* We cannot call set_reserved_asid_bits() here because CPU
--
2.27.0

2022-10-17 08:47:56

by Yunfeng Ye

[permalink] [raw]
Subject: [PATCH 5/5] arm64: mm: Add TLB flush trace on context switch

We do not know how many times the TLB is flushed on context switch.
Adding trace_tlb_flush() in check_and_switch_context() may be useful.

Signed-off-by: Yunfeng Ye <[email protected]>
---
arch/arm64/mm/context.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 0ea3e7485ae7..eab470a97620 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -19,6 +19,8 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>

+#include <trace/events/tlb.h>
+
struct asid_bitmap {
unsigned long *map;
unsigned long nr;
@@ -60,6 +62,8 @@ static DEFINE_STATIC_KEY_FALSE(asid_isolation_enable);
#define ctxid2asid(asid) ((asid) & ~ASID_MASK)
#define asid2ctxid(asid, genid) ((asid) | (genid))

+#define TLB_FLUSH_ALL (-1)
+
/* Get the ASIDBits supported by the current CPU */
static u32 get_cpu_asid_bits(void)
{
@@ -416,8 +420,10 @@ void check_and_switch_context(struct mm_struct *mm)
atomic64_set(&mm->context.id, asid);
}

- if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+ if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) {
local_flush_tlb_all();
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ }

atomic64_set(this_cpu_ptr(&active_asids), asid);
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
--
2.27.0

2022-10-17 09:13:53

by Yunfeng Ye

[permalink] [raw]
Subject: [PATCH 3/5] arm64: mm: Use cpumask in flush_context()

Currently, all CPUs are selected to flush TLB in flush_context().
In order to prepare for flushing only part of the CPUs TLB, we use
asid_housekeeping_mask and use cpumask_or() instead of cpumask_setall().

Signed-off-by: Yunfeng Ye <[email protected]>
---
arch/arm64/mm/context.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 380c7b05c36b..e402997aa1c2 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/cpumask.h>

#include <asm/cpufeature.h>
#include <asm/mmu_context.h>
@@ -32,6 +33,7 @@ static unsigned long *asid_map;
static DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(u64, reserved_asids);
static cpumask_t tlb_flush_pending;
+static const struct cpumask *asid_housekeeping_mask;

static struct asid_bitmap pinned_asid;

@@ -129,17 +131,23 @@ static inline bool asid_gen_match(u64 asid)
return asid_match(asid, asid_read_generation());
}

+static const struct cpumask *flush_cpumask(void)
+{
+ return asid_housekeeping_mask;
+}
+
static void flush_context(void)
{
int i;
u64 asid;
+ const struct cpumask *cpumask = flush_cpumask();

flush_generation();

/* Update the list of reserved ASIDs and the ASID bitmap. */
set_reserved_asid_bits();

- for_each_possible_cpu(i) {
+ for_each_cpu(i, cpumask) {
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
/*
* If this CPU has already been through a
@@ -158,7 +166,7 @@ static void flush_context(void)
* Queue a TLB invalidation for each CPU to perform on next
* context-switch
*/
- cpumask_setall(&tlb_flush_pending);
+ cpumask_or(&tlb_flush_pending, &tlb_flush_pending, cpumask);
}

static bool check_update_reserved_asid(u64 asid, u64 newasid)
@@ -439,6 +447,8 @@ static int asids_init(void)
pinned_asid.map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
pinned_asid.nr = 0;

+ asid_housekeeping_mask = cpu_possible_mask;
+
/*
* We cannot call set_reserved_asid_bits() here because CPU
* caps are not finalized yet, so it is safer to assume KPTI
--
2.27.0

2022-10-17 09:15:14

by Yunfeng Ye

[permalink] [raw]
Subject: [PATCH 2/5] arm64: mm: Extract the processing of asid_generation

To prepare for supporting ASID isolation feature, extract the processing
of asid_generation. it is convenient to modify the asid_generation
centrally.

By the way, It is clearer to put flush_generation() into flush_context().

Signed-off-by: Yunfeng Ye <[email protected]>
---
arch/arm64/mm/context.c | 39 ++++++++++++++++++++++++++++++++-------
1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 8549b5f30352..380c7b05c36b 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -102,14 +102,40 @@ static void set_reserved_asid_bits(void)
bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
}

-#define asid_gen_match(asid) \
- (!(((asid) ^ atomic64_read(&asid_generation)) >> asid_bits))
+static void asid_generation_init(void)
+{
+ atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+}
+
+static void flush_generation(void)
+{
+ /* We're out of ASIDs, so increment the global generation count */
+ atomic64_add_return_relaxed(ASID_FIRST_VERSION,
+ &asid_generation);
+}
+
+static inline u64 asid_read_generation(void)
+{
+ return atomic64_read(&asid_generation);
+}
+
+static inline bool asid_match(u64 asid, u64 genid)
+{
+ return (!(((asid) ^ (genid)) >> asid_bits));
+}
+
+static inline bool asid_gen_match(u64 asid)
+{
+ return asid_match(asid, asid_read_generation());
+}

static void flush_context(void)
{
int i;
u64 asid;

+ flush_generation();
+
/* Update the list of reserved ASIDs and the ASID bitmap. */
set_reserved_asid_bits();

@@ -163,7 +189,7 @@ static u64 new_context(struct mm_struct *mm)
{
static u32 cur_idx = 1;
u64 asid = atomic64_read(&mm->context.id);
- u64 generation = atomic64_read(&asid_generation);
+ u64 generation = asid_read_generation();

if (asid != 0) {
u64 newasid = asid2ctxid(ctxid2asid(asid), generation);
@@ -202,14 +228,12 @@ static u64 new_context(struct mm_struct *mm)
if (asid != NUM_USER_ASIDS)
goto set_asid;

- /* We're out of ASIDs, so increment the global generation count */
- generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
- &asid_generation);
flush_context();

/* We have more ASIDs than CPUs, so this will always succeed */
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);

+ generation = asid_read_generation();
set_asid:
__set_bit(asid, asid_map);
cur_idx = asid;
@@ -405,7 +429,8 @@ arch_initcall(asids_update_limit);
static int asids_init(void)
{
asid_bits = get_cpu_asid_bits();
- atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+ asid_generation_init();
+
asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
if (!asid_map)
panic("Failed to allocate bitmap for %lu ASIDs\n",
--
2.27.0