This patch series is moving out the ASID allocator in a separate file in order
to re-use it for the VMID. The benefits are:
- CPUs are not forced to exit a roll-over.
- Context invalidation is now per-CPU rather than
broadcasted.
There are no performance regression on the fastpath for ASID allocation.
Actually on the hackbench measurement (300 hackbench) it was .7% faster.
The measurement was made on a Seattle based SoC (8 CPUs), with the
number of VMID limited to 4-bit. The test involves running concurrently 40
guests with 2 vCPUs. Each guest will then execute hackbench 5 times
before exiting.
The performance difference between the current algo and the new one are:
- 2.5% less exit from the guest
- 22.4% more flush, although they are now local rather than
broadcasted
- 0.11% faster (just for the record)
The ASID allocator rework to make it generic has been divided in multiple
patches to make the review easier.
A branch with the patch based on 5.1-rc1 can be found:
http://xenbits.xen.org/gitweb/?p=people/julieng/linux-arm.git;a=shortlog;h=refs/heads/vmid-rework/rfc
Cheers,
Julien Grall (14):
arm64/mm: Introduce asid_info structure and move
asid_generation/asid_map to it
arm64/mm: Move active_asids and reserved_asids to asid_info
arm64/mm: Move bits to asid_info
arm64/mm: Move the variable lock and tlb_flush_pending to asid_info
arm64/mm: Remove dependency on MM in new_context
arm64/mm: Store the number of asid allocated per context
arm64/mm: Introduce NUM_ASIDS
arm64/mm: Split asid_inits in 2 parts
arm64/mm: Split the function check_and_switch_context in 3 parts
arm64/mm: Introduce a callback to flush the local context
arm64: Move the ASID allocator code in a separate file
arm64/lib: asid: Allow user to update the context under the lock
arm/kvm: Introduce a new VMID allocator
kvm/arm: Align the VMID allocation with the arm64 ASID one
arch/arm/include/asm/kvm_asid.h | 81 +++++++++++++++
arch/arm/include/asm/kvm_asm.h | 2 +-
arch/arm/include/asm/kvm_host.h | 5 +-
arch/arm/include/asm/kvm_hyp.h | 1 +
arch/arm/kvm/Makefile | 1 +
arch/arm/kvm/asid.c | 191 +++++++++++++++++++++++++++++++++++
arch/arm/kvm/hyp/tlb.c | 8 +-
arch/arm64/include/asm/asid.h | 81 +++++++++++++++
arch/arm64/include/asm/kvm_asid.h | 8 ++
arch/arm64/include/asm/kvm_asm.h | 2 +-
arch/arm64/include/asm/kvm_host.h | 5 +-
arch/arm64/kvm/hyp/tlb.c | 10 +-
arch/arm64/lib/Makefile | 2 +
arch/arm64/lib/asid.c | 191 +++++++++++++++++++++++++++++++++++
arch/arm64/mm/context.c | 205 ++++++--------------------------------
virt/kvm/arm/arm.c | 112 +++++++--------------
16 files changed, 638 insertions(+), 267 deletions(-)
create mode 100644 arch/arm/include/asm/kvm_asid.h
create mode 100644 arch/arm/kvm/asid.c
create mode 100644 arch/arm64/include/asm/asid.h
create mode 100644 arch/arm64/include/asm/kvm_asid.h
create mode 100644 arch/arm64/lib/asid.c
--
2.11.0
In an attempt to make the ASID allocator generic, create a new structure
asid_info to store all the information necessary for the allocator.
For now, move the variables asid_generation and asid_map to the new structure
asid_info. Follow-up patches will move more variables.
Note to avoid more renaming aftwards, a local variable 'info' has been
created and is a pointer to the ASID allocator structure.
Signed-off-by: Julien Grall <[email protected]>
---
arch/arm64/mm/context.c | 46 ++++++++++++++++++++++++++--------------------
1 file changed, 26 insertions(+), 20 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 1f0ea2facf24..34db54f1a39a 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -30,8 +30,11 @@
static u32 asid_bits;
static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-static atomic64_t asid_generation;
-static unsigned long *asid_map;
+struct asid_info
+{
+ atomic64_t generation;
+ unsigned long *map;
+} asid_info;
static DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(u64, reserved_asids);
@@ -88,13 +91,13 @@ void verify_cpu_asid_bits(void)
}
}
-static void flush_context(void)
+static void flush_context(struct asid_info *info)
{
int i;
u64 asid;
/* Update the list of reserved ASIDs and the ASID bitmap. */
- bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+ bitmap_clear(info->map, 0, NUM_USER_ASIDS);
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
@@ -107,7 +110,7 @@ static void flush_context(void)
*/
if (asid == 0)
asid = per_cpu(reserved_asids, i);
- __set_bit(asid2idx(asid), asid_map);
+ __set_bit(asid2idx(asid), info->map);
per_cpu(reserved_asids, i) = asid;
}
@@ -142,11 +145,11 @@ static bool check_update_reserved_asid(u64 asid, u64 newasid)
return hit;
}
-static u64 new_context(struct mm_struct *mm)
+static u64 new_context(struct asid_info *info, struct mm_struct *mm)
{
static u32 cur_idx = 1;
u64 asid = atomic64_read(&mm->context.id);
- u64 generation = atomic64_read(&asid_generation);
+ u64 generation = atomic64_read(&info->generation);
if (asid != 0) {
u64 newasid = generation | (asid & ~ASID_MASK);
@@ -162,7 +165,7 @@ static u64 new_context(struct mm_struct *mm)
* We had a valid ASID in a previous life, so try to re-use
* it if possible.
*/
- if (!__test_and_set_bit(asid2idx(asid), asid_map))
+ if (!__test_and_set_bit(asid2idx(asid), info->map))
return newasid;
}
@@ -173,20 +176,20 @@ static u64 new_context(struct mm_struct *mm)
* a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
* pairs.
*/
- asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+ asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, cur_idx);
if (asid != NUM_USER_ASIDS)
goto set_asid;
/* We're out of ASIDs, so increment the global generation count */
generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
- &asid_generation);
- flush_context();
+ &info->generation);
+ flush_context(info);
/* We have more ASIDs than CPUs, so this will always succeed */
- asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+ asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, 1);
set_asid:
- __set_bit(asid, asid_map);
+ __set_bit(asid, info->map);
cur_idx = asid;
return idx2asid(asid) | generation;
}
@@ -195,6 +198,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
{
unsigned long flags;
u64 asid, old_active_asid;
+ struct asid_info *info = &asid_info;
if (system_supports_cnp())
cpu_set_reserved_ttbr0();
@@ -217,7 +221,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
*/
old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
if (old_active_asid &&
- !((asid ^ atomic64_read(&asid_generation)) >> asid_bits) &&
+ !((asid ^ atomic64_read(&info->generation)) >> asid_bits) &&
atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
old_active_asid, asid))
goto switch_mm_fastpath;
@@ -225,8 +229,8 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
raw_spin_lock_irqsave(&cpu_asid_lock, flags);
/* Check that our ASID belongs to the current generation. */
asid = atomic64_read(&mm->context.id);
- if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) {
- asid = new_context(mm);
+ if ((asid ^ atomic64_read(&info->generation)) >> asid_bits) {
+ asid = new_context(info, mm);
atomic64_set(&mm->context.id, asid);
}
@@ -259,16 +263,18 @@ asmlinkage void post_ttbr_update_workaround(void)
static int asids_init(void)
{
+ struct asid_info *info = &asid_info;
+
asid_bits = get_cpu_asid_bits();
/*
* Expect allocation after rollover to fail if we don't have at least
* one more ASID than CPUs. ASID #0 is reserved for init_mm.
*/
WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus());
- atomic64_set(&asid_generation, ASID_FIRST_VERSION);
- asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), sizeof(*asid_map),
- GFP_KERNEL);
- if (!asid_map)
+ atomic64_set(&info->generation, ASID_FIRST_VERSION);
+ info->map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), sizeof(*info->map),
+ GFP_KERNEL);
+ if (!info->map)
panic("Failed to allocate bitmap for %lu ASIDs\n",
NUM_USER_ASIDS);
--
2.11.0
At the moment ASID_FIRST_VERSION is used to know the number of ASIDs
supported. As we are going to move the ASID allocator in a separate, it
would be better to use a different name for external users.
This patch adds NUM_ASIDS and implements ASID_FIRST_VERSION using it.
Signed-off-by: Julien Grall <[email protected]>
---
arch/arm64/mm/context.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 5a4c2b1aac71..fb13bc249951 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -48,7 +48,9 @@ static DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(u64, reserved_asids);
#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
-#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits))
+#define NUM_ASIDS(info) (1UL << ((info)->bits))
+
+#define ASID_FIRST_VERSION(info) NUM_ASIDS(info)
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define ASID_PER_CONTEXT 2
@@ -56,7 +58,7 @@ static DEFINE_PER_CPU(u64, reserved_asids);
#define ASID_PER_CONTEXT 1
#endif
-#define NUM_CTXT_ASIDS(info) (ASID_FIRST_VERSION(info) >> (info)->ctxt_shift)
+#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift)
#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
--
2.11.0
The function check_and_switch_context is used to:
1) Check whether the ASID is still valid
2) Generate a new one if it is not valid
3) Switch the context
While the latter is specific to the MM subsystem, the rest could be part
of the generic ASID allocator.
After this patch, the function is now split in 3 parts which corresponds
to the use of the functions:
1) asid_check_context: Check if the ASID is still valid
2) asid_new_context: Generate a new ASID for the context
3) check_and_switch_context: Call 1) and 2) and switch the context
1) and 2) have not been merged in a single function because we want to
avoid to add a branch in when the ASID is still valid. This will matter
when the code will be moved in separate file later on as 1) will reside
in the header as a static inline function.
Signed-off-by: Julien Grall <[email protected]>
---
Will wants to avoid to add a branch when the ASID is still valid. So
1) and 2) are in separates function. The former will move to a new
header and make static inline.
---
arch/arm64/mm/context.c | 51 +++++++++++++++++++++++++++++++++++++------------
1 file changed, 39 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b071a1b3469e..cbf1c24cb3ee 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -204,16 +204,21 @@ static u64 new_context(struct asid_info *info, atomic64_t *pasid)
return idx2asid(info, asid) | generation;
}
-void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
+static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu);
+
+/*
+ * Check the ASID is still valid for the context. If not generate a new ASID.
+ *
+ * @pasid: Pointer to the current ASID batch
+ * @cpu: current CPU ID. Must have been acquired throught get_cpu()
+ */
+static void asid_check_context(struct asid_info *info,
+ atomic64_t *pasid, unsigned int cpu)
{
- unsigned long flags;
u64 asid, old_active_asid;
- struct asid_info *info = &asid_info;
- if (system_supports_cnp())
- cpu_set_reserved_ttbr0();
-
- asid = atomic64_read(&mm->context.id);
+ asid = atomic64_read(pasid);
/*
* The memory ordering here is subtle.
@@ -234,14 +239,30 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
!((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
old_active_asid, asid))
- goto switch_mm_fastpath;
+ return;
+
+ asid_new_context(info, pasid, cpu);
+}
+
+/*
+ * Generate a new ASID for the context.
+ *
+ * @pasid: Pointer to the current ASID batch allocated. It will be updated
+ * with the new ASID batch.
+ * @cpu: current CPU ID. Must have been acquired through get_cpu()
+ */
+static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu)
+{
+ unsigned long flags;
+ u64 asid;
raw_spin_lock_irqsave(&info->lock, flags);
/* Check that our ASID belongs to the current generation. */
- asid = atomic64_read(&mm->context.id);
+ asid = atomic64_read(pasid);
if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
- asid = new_context(info, &mm->context.id);
- atomic64_set(&mm->context.id, asid);
+ asid = new_context(info, pasid);
+ atomic64_set(pasid, asid);
}
if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
@@ -249,8 +270,14 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
atomic64_set(&active_asid(info, cpu), asid);
raw_spin_unlock_irqrestore(&info->lock, flags);
+}
+
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
+{
+ if (system_supports_cnp())
+ cpu_set_reserved_ttbr0();
-switch_mm_fastpath:
+ asid_check_context(&asid_info, &mm->context.id, cpu);
arm64_apply_bp_hardening();
--
2.11.0
Some users of the ASID allocator (e.g VMID) will require to update the
context when a new ASID is generated. This has to be protected by a lock
to prevent concurrent modification.
Rather than introducing yet another lock, it is possible to re-use the
allocator lock for that purpose. This patch introduces a new callback
that will be call when updating the context.
Signed-off-by: Julien Grall <[email protected]>
---
arch/arm64/include/asm/asid.h | 12 ++++++++----
arch/arm64/lib/asid.c | 10 ++++++++--
arch/arm64/mm/context.c | 11 ++++++++---
3 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/arch/arm64/include/asm/asid.h b/arch/arm64/include/asm/asid.h
index bb62b587f37f..d8d9dc875bec 100644
--- a/arch/arm64/include/asm/asid.h
+++ b/arch/arm64/include/asm/asid.h
@@ -23,6 +23,8 @@ struct asid_info
unsigned int ctxt_shift;
/* Callback to locally flush the context. */
void (*flush_cpu_ctxt_cb)(void);
+ /* Callback to call when a context is updated */
+ void (*update_ctxt_cb)(void *ctxt);
};
#define NUM_ASIDS(info) (1UL << ((info)->bits))
@@ -31,7 +33,7 @@ struct asid_info
#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
void asid_new_context(struct asid_info *info, atomic64_t *pasid,
- unsigned int cpu);
+ unsigned int cpu, void *ctxt);
/*
* Check the ASID is still valid for the context. If not generate a new ASID.
@@ -40,7 +42,8 @@ void asid_new_context(struct asid_info *info, atomic64_t *pasid,
* @cpu: current CPU ID. Must have been acquired throught get_cpu()
*/
static inline void asid_check_context(struct asid_info *info,
- atomic64_t *pasid, unsigned int cpu)
+ atomic64_t *pasid, unsigned int cpu,
+ void *ctxt)
{
u64 asid, old_active_asid;
@@ -67,11 +70,12 @@ static inline void asid_check_context(struct asid_info *info,
old_active_asid, asid))
return;
- asid_new_context(info, pasid, cpu);
+ asid_new_context(info, pasid, cpu, ctxt);
}
int asid_allocator_init(struct asid_info *info,
u32 bits, unsigned int asid_per_ctxt,
- void (*flush_cpu_ctxt_cb)(void));
+ void (*flush_cpu_ctxt_cb)(void),
+ void (*update_ctxt_cb)(void *ctxt));
#endif
diff --git a/arch/arm64/lib/asid.c b/arch/arm64/lib/asid.c
index 72b71bfb32be..b47e6769c1bc 100644
--- a/arch/arm64/lib/asid.c
+++ b/arch/arm64/lib/asid.c
@@ -130,9 +130,10 @@ static u64 new_context(struct asid_info *info, atomic64_t *pasid)
* @pasid: Pointer to the current ASID batch allocated. It will be updated
* with the new ASID batch.
* @cpu: current CPU ID. Must have been acquired through get_cpu()
+ * @ctxt: Context to update when calling update_context
*/
void asid_new_context(struct asid_info *info, atomic64_t *pasid,
- unsigned int cpu)
+ unsigned int cpu, void *ctxt)
{
unsigned long flags;
u64 asid;
@@ -149,6 +150,9 @@ void asid_new_context(struct asid_info *info, atomic64_t *pasid,
info->flush_cpu_ctxt_cb();
atomic64_set(&active_asid(info, cpu), asid);
+
+ info->update_ctxt_cb(ctxt);
+
raw_spin_unlock_irqrestore(&info->lock, flags);
}
@@ -163,11 +167,13 @@ void asid_new_context(struct asid_info *info, atomic64_t *pasid,
*/
int asid_allocator_init(struct asid_info *info,
u32 bits, unsigned int asid_per_ctxt,
- void (*flush_cpu_ctxt_cb)(void))
+ void (*flush_cpu_ctxt_cb)(void),
+ void (*update_ctxt_cb)(void *ctxt))
{
info->bits = bits;
info->ctxt_shift = ilog2(asid_per_ctxt);
info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
+ info->update_ctxt_cb = update_ctxt_cb;
/*
* Expect allocation after rollover to fail if we don't have at least
* one more ASID than CPUs. ASID #0 is always reserved.
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 95ee7711a2ef..737b4bd7bbe7 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -82,7 +82,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
if (system_supports_cnp())
cpu_set_reserved_ttbr0();
- asid_check_context(&asid_info, &mm->context.id, cpu);
+ asid_check_context(&asid_info, &mm->context.id, cpu, mm);
arm64_apply_bp_hardening();
@@ -108,12 +108,17 @@ static void asid_flush_cpu_ctxt(void)
local_flush_tlb_all();
}
+static void asid_update_ctxt(void *ctxt)
+{
+ /* Nothing to do */
+}
+
static int asids_init(void)
{
u32 bits = get_cpu_asid_bits();
- if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT,
- asid_flush_cpu_ctxt))
+ if (asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT,
+ asid_flush_cpu_ctxt, asid_update_ctxt))
panic("Unable to initialize ASID allocator for %lu ASIDs\n",
NUM_ASIDS(&asid_info));
--
2.11.0
At the moment, the VMID algorithm will send an SGI to all the CPUs to
force an exit and then broadcast a full TLB flush and I-Cache
invalidation.
This patch re-use the new ASID allocator. The
benefits are:
- CPUs are not forced to exit at roll-over. Instead the VMID will be
marked reserved and the context will be flushed at next exit. This
will reduce the IPIs traffic.
- Context invalidation is now per-CPU rather than broadcasted.
With the new algo, the code is now adapted:
- The function __kvm_flush_vm_context() has been renamed to
__kvm_flush_cpu_vmid_context and now only flushing the current CPU context.
- The call to update_vttbr() will be done with preemption disabled
as the new algo requires to store information per-CPU.
- The TLBs associated to EL1 will be flushed when booting a CPU to
deal with stale information. This was previously done on the
allocation of the first VMID of a new generation.
The measurement was made on a Seattle based SoC (8 CPUs), with the
number of VMID limited to 4-bit. The test involves running concurrently 40
guests with 2 vCPUs. Each guest will then execute hackbench 5 times
before exiting.
The performance difference between the current algo and the new one are:
- 2.5% less exit from the guest
- 22.4% more flush, although they are now local rather than
broadcasted
- 0.11% faster (just for the record)
Signed-off-by: Julien Grall <[email protected]>
----
Looking at the __kvm_flush_vm_context, it might be possible to
reduce more the overhead by removing the I-Cache flush for other
cache than VIPT. This has been left aside for now.
---
arch/arm/include/asm/kvm_asm.h | 2 +-
arch/arm/include/asm/kvm_host.h | 5 +-
arch/arm/include/asm/kvm_hyp.h | 1 +
arch/arm/kvm/hyp/tlb.c | 8 +--
arch/arm64/include/asm/kvm_asid.h | 8 +++
arch/arm64/include/asm/kvm_asm.h | 2 +-
arch/arm64/include/asm/kvm_host.h | 5 +-
arch/arm64/kvm/hyp/tlb.c | 10 ++--
virt/kvm/arm/arm.c | 112 +++++++++++++-------------------------
9 files changed, 61 insertions(+), 92 deletions(-)
create mode 100644 arch/arm64/include/asm/kvm_asid.h
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 35491af87985..ce60a4a46fcc 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -65,7 +65,7 @@ struct kvm_vcpu;
extern char __kvm_hyp_init[];
extern char __kvm_hyp_init_end[];
-extern void __kvm_flush_vm_context(void);
+extern void __kvm_flush_cpu_vmid_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 770d73257ad9..e2c3a4a7b020 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -59,8 +59,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
struct kvm_vmid {
- /* The VMID generation used for the virt. memory system */
- u64 vmid_gen;
+ /* The ASID used for the ASID allocator */
+ atomic64_t asid;
u32 vmid;
};
@@ -264,7 +264,6 @@ unsigned long __kvm_call_hyp(void *hypfn, ...);
ret; \
})
-void force_vm_exit(const cpumask_t *mask);
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events);
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 87bcd18df8d5..c3d1011ca1bf 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -75,6 +75,7 @@
#define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0)
#define TLBIALL __ACCESS_CP15(c8, 0, c7, 0)
#define TLBIALLNSNHIS __ACCESS_CP15(c8, 4, c3, 4)
+#define TLBIALLNSNH __ACCESS_CP15(c8, 4, c7, 4)
#define PRRR __ACCESS_CP15(c10, 0, c2, 0)
#define NMRR __ACCESS_CP15(c10, 0, c2, 1)
#define AMAIR0 __ACCESS_CP15(c10, 0, c3, 0)
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
index 8e4afba73635..42b9ab47fc94 100644
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -71,9 +71,9 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
write_sysreg(0, VTTBR);
}
-void __hyp_text __kvm_flush_vm_context(void)
+void __hyp_text __kvm_flush_cpu_vmid_context(void)
{
- write_sysreg(0, TLBIALLNSNHIS);
- write_sysreg(0, ICIALLUIS);
- dsb(ish);
+ write_sysreg(0, TLBIALLNSNH);
+ write_sysreg(0, ICIALLU);
+ dsb(nsh);
}
diff --git a/arch/arm64/include/asm/kvm_asid.h b/arch/arm64/include/asm/kvm_asid.h
new file mode 100644
index 000000000000..8b586e43c094
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_asid.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM64_KVM_ASID_H__
+#define __ARM64_KVM_ASID_H__
+
+#include <asm/asid.h>
+
+#endif /* __ARM64_KVM_ASID_H__ */
+
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index f5b79e995f40..8d7d01ee1d03 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -62,7 +62,7 @@ extern char __kvm_hyp_init_end[];
extern char __kvm_hyp_vector[];
-extern void __kvm_flush_vm_context(void);
+extern void __kvm_flush_cpu_vmid_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a01fe087e022..c64c9ac031df 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -60,8 +60,8 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
struct kvm_vmid {
- /* The VMID generation used for the virt. memory system */
- u64 vmid_gen;
+ /* The ASID used for the ASID allocator */
+ atomic64_t asid;
u32 vmid;
};
@@ -417,7 +417,6 @@ u64 __kvm_call_hyp(void *hypfn, ...);
ret; \
})
-void force_vm_exit(const cpumask_t *mask);
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 76c30866069e..e80e922988c1 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -200,10 +200,10 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
__tlb_switch_to_host()(kvm, &cxt);
}
-void __hyp_text __kvm_flush_vm_context(void)
+void __hyp_text __kvm_flush_cpu_vmid_context(void)
{
- dsb(ishst);
- __tlbi(alle1is);
- asm volatile("ic ialluis" : : );
- dsb(ish);
+ dsb(nshst);
+ __tlbi(alle1);
+ asm volatile("ic iallu" : : );
+ dsb(nsh);
}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 99c37384ba7b..03f95fffd672 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -46,6 +46,7 @@
#include <asm/cpufeature.h>
#include <asm/virt.h>
#include <asm/kvm_arm.h>
+#include <asm/kvm_asid.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_emulate.h>
@@ -62,10 +63,10 @@ static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
/* Per-CPU variable containing the currently running vcpu. */
static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
-/* The VMID used in the VTTBR */
-static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
-static u32 kvm_next_vmid;
-static DEFINE_SPINLOCK(kvm_vmid_lock);
+static DEFINE_PER_CPU(atomic64_t, active_vmids);
+static DEFINE_PER_CPU(u64, reserved_vmids);
+
+struct asid_info vmid_info;
static bool vgic_present;
@@ -140,9 +141,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_vgic_early_init(kvm);
- /* Mark the initial VMID generation invalid */
- kvm->arch.vmid.vmid_gen = 0;
-
/* The maximum number of VCPUs is limited by the host's GIC model */
kvm->arch.max_vcpus = vgic_present ?
kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
@@ -455,35 +453,17 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
return vcpu_mode_priv(vcpu);
}
-/* Just ensure a guest exit from a particular CPU */
-static void exit_vm_noop(void *info)
+static void vmid_flush_cpu_ctxt(void)
{
+ kvm_call_hyp(__kvm_flush_cpu_vmid_context);
}
-void force_vm_exit(const cpumask_t *mask)
+static void vmid_update_ctxt(void *ctxt)
{
- preempt_disable();
- smp_call_function_many(mask, exit_vm_noop, NULL, true);
- preempt_enable();
-}
+ struct kvm_vmid *vmid = ctxt;
+ u64 asid = atomic64_read(&vmid->asid);
-/**
- * need_new_vmid_gen - check that the VMID is still valid
- * @vmid: The VMID to check
- *
- * return true if there is a new generation of VMIDs being used
- *
- * The hardware supports a limited set of values with the value zero reserved
- * for the host, so we check if an assigned value belongs to a previous
- * generation, which which requires us to assign a new value. If we're the
- * first to use a VMID for the new generation, we must flush necessary caches
- * and TLBs on all CPUs.
- */
-static bool need_new_vmid_gen(struct kvm_vmid *vmid)
-{
- u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
- smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
- return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
+ vmid->vmid = asid & ((1ULL << kvm_get_vmid_bits()) - 1);
}
/**
@@ -493,48 +473,11 @@ static bool need_new_vmid_gen(struct kvm_vmid *vmid)
*/
static void update_vmid(struct kvm_vmid *vmid)
{
- if (!need_new_vmid_gen(vmid))
- return;
-
- spin_lock(&kvm_vmid_lock);
-
- /*
- * We need to re-check the vmid_gen here to ensure that if another vcpu
- * already allocated a valid vmid for this vm, then this vcpu should
- * use the same vmid.
- */
- if (!need_new_vmid_gen(vmid)) {
- spin_unlock(&kvm_vmid_lock);
- return;
- }
-
- /* First user of a new VMID generation? */
- if (unlikely(kvm_next_vmid == 0)) {
- atomic64_inc(&kvm_vmid_gen);
- kvm_next_vmid = 1;
-
- /*
- * On SMP we know no other CPUs can use this CPU's or each
- * other's VMID after force_vm_exit returns since the
- * kvm_vmid_lock blocks them from reentry to the guest.
- */
- force_vm_exit(cpu_all_mask);
- /*
- * Now broadcast TLB + ICACHE invalidation over the inner
- * shareable domain to make sure all data structures are
- * clean.
- */
- kvm_call_hyp(__kvm_flush_vm_context);
- }
+ int cpu = get_cpu();
- vmid->vmid = kvm_next_vmid;
- kvm_next_vmid++;
- kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
+ asid_check_context(&vmid_info, &vmid->asid, cpu, vmid);
- smp_wmb();
- WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
-
- spin_unlock(&kvm_vmid_lock);
+ put_cpu();
}
static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
@@ -685,8 +628,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
cond_resched();
- update_vmid(&vcpu->kvm->arch.vmid);
-
check_vcpu_requests(vcpu);
/*
@@ -696,6 +637,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
preempt_disable();
+ /*
+ * The ASID/VMID allocator only tracks active VMIDs per
+ * physical CPU, and therefore the VMID allocated may not be
+ * preserved on VMID roll-over if the task was preempted,
+ * making a thread's VMID inactive. So we need to call
+ * update_vttbr in non-premptible context.
+ */
+ update_vmid(&vcpu->kvm->arch.vmid);
+
kvm_pmu_flush_hwstate(vcpu);
local_irq_disable();
@@ -734,8 +684,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
- if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) ||
- kvm_request_pending(vcpu)) {
+ if (ret <= 0 || kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
isb(); /* Ensure work in x_flush_hwstate is committed */
kvm_pmu_sync_hwstate(vcpu);
@@ -1305,6 +1254,8 @@ static void cpu_init_hyp_mode(void *dummy)
__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
__cpu_init_stage2();
+
+ kvm_call_hyp(__kvm_flush_cpu_vmid_context);
}
static void cpu_hyp_reset(void)
@@ -1412,6 +1363,17 @@ static inline void hyp_cpu_pm_exit(void)
static int init_common_resources(void)
{
+ /*
+ * Initialize the ASID allocator telling it to allocate a single
+ * VMID per VM.
+ */
+ if (asid_allocator_init(&vmid_info, kvm_get_vmid_bits(), 1,
+ vmid_flush_cpu_ctxt, vmid_update_ctxt))
+ panic("Failed to initialize VMID allocator\n");
+
+ vmid_info.active = &active_vmids;
+ vmid_info.reserved = &reserved_vmids;
+
kvm_set_ipa_limit();
return 0;
--
2.11.0
A follow-up patch will replace the KVM VMID allocator with the arm64 ASID
allocator. It is not yet clear how the code can be shared between arm
and arm64, so this is a verbatim copy of arch/arm64/lib/asid.c.
Signed-off-by: Julien Grall <[email protected]>
---
arch/arm/include/asm/kvm_asid.h | 81 +++++++++++++++++
arch/arm/kvm/Makefile | 1 +
arch/arm/kvm/asid.c | 191 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 273 insertions(+)
create mode 100644 arch/arm/include/asm/kvm_asid.h
create mode 100644 arch/arm/kvm/asid.c
diff --git a/arch/arm/include/asm/kvm_asid.h b/arch/arm/include/asm/kvm_asid.h
new file mode 100644
index 000000000000..f312a6d7543c
--- /dev/null
+++ b/arch/arm/include/asm/kvm_asid.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM_KVM_ASID_H__
+#define __ARM_KVM_ASID_H__
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+
+struct asid_info
+{
+ atomic64_t generation;
+ unsigned long *map;
+ atomic64_t __percpu *active;
+ u64 __percpu *reserved;
+ u32 bits;
+ /* Lock protecting the structure */
+ raw_spinlock_t lock;
+ /* Which CPU requires context flush on next call */
+ cpumask_t flush_pending;
+ /* Number of ASID allocated by context (shift value) */
+ unsigned int ctxt_shift;
+ /* Callback to locally flush the context. */
+ void (*flush_cpu_ctxt_cb)(void);
+ /* Callback to call when a context is updated */
+ void (*update_ctxt_cb)(void *ctxt);
+};
+
+#define NUM_ASIDS(info) (1UL << ((info)->bits))
+#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift)
+
+#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
+
+void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu, void *ctxt);
+
+/*
+ * Check the ASID is still valid for the context. If not generate a new ASID.
+ *
+ * @pasid: Pointer to the current ASID batch
+ * @cpu: current CPU ID. Must have been acquired throught get_cpu()
+ */
+static inline void asid_check_context(struct asid_info *info,
+ atomic64_t *pasid, unsigned int cpu,
+ void *ctxt)
+{
+ u64 asid, old_active_asid;
+
+ asid = atomic64_read(pasid);
+
+ /*
+ * The memory ordering here is subtle.
+ * If our active_asid is non-zero and the ASID matches the current
+ * generation, then we update the active_asid entry with a relaxed
+ * cmpxchg. Racing with a concurrent rollover means that either:
+ *
+ * - We get a zero back from the cmpxchg and end up waiting on the
+ * lock. Taking the lock synchronises with the rollover and so
+ * we are forced to see the updated generation.
+ *
+ * - We get a valid ASID back from the cmpxchg, which means the
+ * relaxed xchg in flush_context will treat us as reserved
+ * because atomic RmWs are totally ordered for a given location.
+ */
+ old_active_asid = atomic64_read(&active_asid(info, cpu));
+ if (old_active_asid &&
+ !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
+ atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
+ old_active_asid, asid))
+ return;
+
+ asid_new_context(info, pasid, cpu, ctxt);
+}
+
+int asid_allocator_init(struct asid_info *info,
+ u32 bits, unsigned int asid_per_ctxt,
+ void (*flush_cpu_ctxt_cb)(void),
+ void (*update_ctxt_cb)(void *ctxt));
+
+#endif /* __ARM_KVM_ASID_H__ */
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 531e59f5be9c..35d2d4c67827 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/
obj-y += kvm-arm.o init.o interrupts.o
obj-y += handle_exit.o guest.o emulate.o reset.o
+obj-y += asid.o
obj-y += coproc.o coproc_a15.o coproc_a7.o vgic-v3-coproc.o
obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
diff --git a/arch/arm/kvm/asid.c b/arch/arm/kvm/asid.c
new file mode 100644
index 000000000000..60a25270163a
--- /dev/null
+++ b/arch/arm/kvm/asid.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic ASID allocator.
+ *
+ * Based on arch/arm/mm/context.c
+ *
+ * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#include <linux/slab.h>
+
+#include <asm/kvm_asid.h>
+
+#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
+
+#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
+#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits))
+
+#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
+#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
+
+static void flush_context(struct asid_info *info)
+{
+ int i;
+ u64 asid;
+
+ /* Update the list of reserved ASIDs and the ASID bitmap. */
+ bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
+
+ for_each_possible_cpu(i) {
+ asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
+ /*
+ * If this CPU has already been through a
+ * rollover, but hasn't run another task in
+ * the meantime, we must preserve its reserved
+ * ASID, as this is the only trace we have of
+ * the process it is still running.
+ */
+ if (asid == 0)
+ asid = reserved_asid(info, i);
+ __set_bit(asid2idx(info, asid), info->map);
+ reserved_asid(info, i) = asid;
+ }
+
+ /*
+ * Queue a TLB invalidation for each CPU to perform on next
+ * context-switch
+ */
+ cpumask_setall(&info->flush_pending);
+}
+
+static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
+ u64 newasid)
+{
+ int cpu;
+ bool hit = false;
+
+ /*
+ * Iterate over the set of reserved ASIDs looking for a match.
+ * If we find one, then we can update our mm to use newasid
+ * (i.e. the same ASID in the current generation) but we can't
+ * exit the loop early, since we need to ensure that all copies
+ * of the old ASID are updated to reflect the mm. Failure to do
+ * so could result in us missing the reserved ASID in a future
+ * generation.
+ */
+ for_each_possible_cpu(cpu) {
+ if (reserved_asid(info, cpu) == asid) {
+ hit = true;
+ reserved_asid(info, cpu) = newasid;
+ }
+ }
+
+ return hit;
+}
+
+static u64 new_context(struct asid_info *info, atomic64_t *pasid)
+{
+ static u32 cur_idx = 1;
+ u64 asid = atomic64_read(pasid);
+ u64 generation = atomic64_read(&info->generation);
+
+ if (asid != 0) {
+ u64 newasid = generation | (asid & ~ASID_MASK(info));
+
+ /*
+ * If our current ASID was active during a rollover, we
+ * can continue to use it and this was just a false alarm.
+ */
+ if (check_update_reserved_asid(info, asid, newasid))
+ return newasid;
+
+ /*
+ * We had a valid ASID in a previous life, so try to re-use
+ * it if possible.
+ */
+ if (!__test_and_set_bit(asid2idx(info, asid), info->map))
+ return newasid;
+ }
+
+ /*
+ * Allocate a free ASID. If we can't find one, take a note of the
+ * currently active ASIDs and mark the TLBs as requiring flushes. We
+ * always count from ASID #2 (index 1), as we use ASID #0 when setting
+ * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
+ * pairs.
+ */
+ asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
+ if (asid != NUM_CTXT_ASIDS(info))
+ goto set_asid;
+
+ /* We're out of ASIDs, so increment the global generation count */
+ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
+ &info->generation);
+ flush_context(info);
+
+ /* We have more ASIDs than CPUs, so this will always succeed */
+ asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
+
+set_asid:
+ __set_bit(asid, info->map);
+ cur_idx = asid;
+ return idx2asid(info, asid) | generation;
+}
+
+/*
+ * Generate a new ASID for the context.
+ *
+ * @pasid: Pointer to the current ASID batch allocated. It will be updated
+ * with the new ASID batch.
+ * @cpu: current CPU ID. Must have been acquired through get_cpu()
+ * @ctxt: Context to update when calling update_context
+ */
+void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu, void *ctxt)
+{
+ unsigned long flags;
+ u64 asid;
+
+ raw_spin_lock_irqsave(&info->lock, flags);
+ /* Check that our ASID belongs to the current generation. */
+ asid = atomic64_read(pasid);
+ if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
+ asid = new_context(info, pasid);
+ atomic64_set(pasid, asid);
+ }
+
+ if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
+ info->flush_cpu_ctxt_cb();
+
+ atomic64_set(&active_asid(info, cpu), asid);
+
+ info->update_ctxt_cb(ctxt);
+
+ raw_spin_unlock_irqrestore(&info->lock, flags);
+}
+
+/*
+ * Initialize the ASID allocator
+ *
+ * @info: Pointer to the asid allocator structure
+ * @bits: Number of ASIDs available
+ * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
+ * allocated contiguously for a given context. This value should be a power of
+ * 2.
+ */
+int asid_allocator_init(struct asid_info *info,
+ u32 bits, unsigned int asid_per_ctxt,
+ void (*flush_cpu_ctxt_cb)(void),
+ void (*update_ctxt_cb)(void *ctxt))
+{
+ info->bits = bits;
+ info->ctxt_shift = ilog2(asid_per_ctxt);
+ info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
+ info->update_ctxt_cb = update_ctxt_cb;
+ /*
+ * Expect allocation after rollover to fail if we don't have at least
+ * one more ASID than CPUs. ASID #0 is always reserved.
+ */
+ WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
+ atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
+ info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
+ sizeof(*info->map), GFP_KERNEL);
+ if (!info->map)
+ return -ENOMEM;
+
+ raw_spin_lock_init(&info->lock);
+
+ return 0;
+}
--
2.11.0
We will want to re-use the ASID allocator in a separate context (e.g
allocating VMID). So move the code in a new file.
The function asid_check_context has been moved in the header as a static
inline function because we want to avoid add a branch when checking if the
ASID is still valid.
Signed-off-by: Julien Grall <[email protected]>
---
This code will be used in the virt code for allocating VMID. I am not
entirely sure where to place it. Lib could potentially be a good place but I
am not entirely convinced the algo as it is could be used by other
architecture.
Looking at x86, it seems that it will not be possible to re-use because
the number of PCID (aka ASID) could be smaller than the number of CPUs.
See commit message 10af6235e0d327d42e1bad974385197817923dc1 "x86/mm:
Implement PCID based optimization: try to preserve old TLB entries using
PCI".
---
arch/arm64/include/asm/asid.h | 77 ++++++++++++++
arch/arm64/lib/Makefile | 2 +
arch/arm64/lib/asid.c | 185 +++++++++++++++++++++++++++++++++
arch/arm64/mm/context.c | 235 +-----------------------------------------
4 files changed, 267 insertions(+), 232 deletions(-)
create mode 100644 arch/arm64/include/asm/asid.h
create mode 100644 arch/arm64/lib/asid.c
diff --git a/arch/arm64/include/asm/asid.h b/arch/arm64/include/asm/asid.h
new file mode 100644
index 000000000000..bb62b587f37f
--- /dev/null
+++ b/arch/arm64/include/asm/asid.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ASM_ASID_H
+#define __ASM_ASM_ASID_H
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+
+struct asid_info
+{
+ atomic64_t generation;
+ unsigned long *map;
+ atomic64_t __percpu *active;
+ u64 __percpu *reserved;
+ u32 bits;
+ /* Lock protecting the structure */
+ raw_spinlock_t lock;
+ /* Which CPU requires context flush on next call */
+ cpumask_t flush_pending;
+ /* Number of ASID allocated by context (shift value) */
+ unsigned int ctxt_shift;
+ /* Callback to locally flush the context. */
+ void (*flush_cpu_ctxt_cb)(void);
+};
+
+#define NUM_ASIDS(info) (1UL << ((info)->bits))
+#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift)
+
+#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
+
+void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu);
+
+/*
+ * Check the ASID is still valid for the context. If not generate a new ASID.
+ *
+ * @pasid: Pointer to the current ASID batch
+ * @cpu: current CPU ID. Must have been acquired throught get_cpu()
+ */
+static inline void asid_check_context(struct asid_info *info,
+ atomic64_t *pasid, unsigned int cpu)
+{
+ u64 asid, old_active_asid;
+
+ asid = atomic64_read(pasid);
+
+ /*
+ * The memory ordering here is subtle.
+ * If our active_asid is non-zero and the ASID matches the current
+ * generation, then we update the active_asid entry with a relaxed
+ * cmpxchg. Racing with a concurrent rollover means that either:
+ *
+ * - We get a zero back from the cmpxchg and end up waiting on the
+ * lock. Taking the lock synchronises with the rollover and so
+ * we are forced to see the updated generation.
+ *
+ * - We get a valid ASID back from the cmpxchg, which means the
+ * relaxed xchg in flush_context will treat us as reserved
+ * because atomic RmWs are totally ordered for a given location.
+ */
+ old_active_asid = atomic64_read(&active_asid(info, cpu));
+ if (old_active_asid &&
+ !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
+ atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
+ old_active_asid, asid))
+ return;
+
+ asid_new_context(info, pasid, cpu);
+}
+
+int asid_allocator_init(struct asid_info *info,
+ u32 bits, unsigned int asid_per_ctxt,
+ void (*flush_cpu_ctxt_cb)(void));
+
+#endif
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 5540a1638baf..720df5ee2aa2 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -5,6 +5,8 @@ lib-y := clear_user.o delay.o copy_from_user.o \
memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
strchr.o strrchr.o tishift.o
+lib-y += asid.o
+
ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
CFLAGS_REMOVE_xor-neon.o += -mgeneral-regs-only
diff --git a/arch/arm64/lib/asid.c b/arch/arm64/lib/asid.c
new file mode 100644
index 000000000000..72b71bfb32be
--- /dev/null
+++ b/arch/arm64/lib/asid.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic ASID allocator.
+ *
+ * Based on arch/arm/mm/context.c
+ *
+ * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#include <linux/slab.h>
+
+#include <asm/asid.h>
+
+#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
+
+#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
+#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits))
+
+#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
+#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
+
+static void flush_context(struct asid_info *info)
+{
+ int i;
+ u64 asid;
+
+ /* Update the list of reserved ASIDs and the ASID bitmap. */
+ bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
+
+ for_each_possible_cpu(i) {
+ asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
+ /*
+ * If this CPU has already been through a
+ * rollover, but hasn't run another task in
+ * the meantime, we must preserve its reserved
+ * ASID, as this is the only trace we have of
+ * the process it is still running.
+ */
+ if (asid == 0)
+ asid = reserved_asid(info, i);
+ __set_bit(asid2idx(info, asid), info->map);
+ reserved_asid(info, i) = asid;
+ }
+
+ /*
+ * Queue a TLB invalidation for each CPU to perform on next
+ * context-switch
+ */
+ cpumask_setall(&info->flush_pending);
+}
+
+static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
+ u64 newasid)
+{
+ int cpu;
+ bool hit = false;
+
+ /*
+ * Iterate over the set of reserved ASIDs looking for a match.
+ * If we find one, then we can update our mm to use newasid
+ * (i.e. the same ASID in the current generation) but we can't
+ * exit the loop early, since we need to ensure that all copies
+ * of the old ASID are updated to reflect the mm. Failure to do
+ * so could result in us missing the reserved ASID in a future
+ * generation.
+ */
+ for_each_possible_cpu(cpu) {
+ if (reserved_asid(info, cpu) == asid) {
+ hit = true;
+ reserved_asid(info, cpu) = newasid;
+ }
+ }
+
+ return hit;
+}
+
+static u64 new_context(struct asid_info *info, atomic64_t *pasid)
+{
+ static u32 cur_idx = 1;
+ u64 asid = atomic64_read(pasid);
+ u64 generation = atomic64_read(&info->generation);
+
+ if (asid != 0) {
+ u64 newasid = generation | (asid & ~ASID_MASK(info));
+
+ /*
+ * If our current ASID was active during a rollover, we
+ * can continue to use it and this was just a false alarm.
+ */
+ if (check_update_reserved_asid(info, asid, newasid))
+ return newasid;
+
+ /*
+ * We had a valid ASID in a previous life, so try to re-use
+ * it if possible.
+ */
+ if (!__test_and_set_bit(asid2idx(info, asid), info->map))
+ return newasid;
+ }
+
+ /*
+ * Allocate a free ASID. If we can't find one, take a note of the
+ * currently active ASIDs and mark the TLBs as requiring flushes. We
+ * always count from ASID #2 (index 1), as we use ASID #0 when setting
+ * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
+ * pairs.
+ */
+ asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
+ if (asid != NUM_CTXT_ASIDS(info))
+ goto set_asid;
+
+ /* We're out of ASIDs, so increment the global generation count */
+ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
+ &info->generation);
+ flush_context(info);
+
+ /* We have more ASIDs than CPUs, so this will always succeed */
+ asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
+
+set_asid:
+ __set_bit(asid, info->map);
+ cur_idx = asid;
+ return idx2asid(info, asid) | generation;
+}
+
+/*
+ * Generate a new ASID for the context.
+ *
+ * @pasid: Pointer to the current ASID batch allocated. It will be updated
+ * with the new ASID batch.
+ * @cpu: current CPU ID. Must have been acquired through get_cpu()
+ */
+void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu)
+{
+ unsigned long flags;
+ u64 asid;
+
+ raw_spin_lock_irqsave(&info->lock, flags);
+ /* Check that our ASID belongs to the current generation. */
+ asid = atomic64_read(pasid);
+ if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
+ asid = new_context(info, pasid);
+ atomic64_set(pasid, asid);
+ }
+
+ if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
+ info->flush_cpu_ctxt_cb();
+
+ atomic64_set(&active_asid(info, cpu), asid);
+ raw_spin_unlock_irqrestore(&info->lock, flags);
+}
+
+/*
+ * Initialize the ASID allocator
+ *
+ * @info: Pointer to the asid allocator structure
+ * @bits: Number of ASIDs available
+ * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
+ * allocated contiguously for a given context. This value should be a power of
+ * 2.
+ */
+int asid_allocator_init(struct asid_info *info,
+ u32 bits, unsigned int asid_per_ctxt,
+ void (*flush_cpu_ctxt_cb)(void))
+{
+ info->bits = bits;
+ info->ctxt_shift = ilog2(asid_per_ctxt);
+ info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
+ /*
+ * Expect allocation after rollover to fail if we don't have at least
+ * one more ASID than CPUs. ASID #0 is always reserved.
+ */
+ WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
+ atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
+ info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
+ sizeof(*info->map), GFP_KERNEL);
+ if (!info->map)
+ return -ENOMEM;
+
+ raw_spin_lock_init(&info->lock);
+
+ return 0;
+}
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 678a57b77c91..95ee7711a2ef 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -22,47 +22,22 @@
#include <linux/slab.h>
#include <linux/mm.h>
+#include <asm/asid.h>
#include <asm/cpufeature.h>
#include <asm/mmu_context.h>
#include <asm/smp.h>
#include <asm/tlbflush.h>
-struct asid_info
-{
- atomic64_t generation;
- unsigned long *map;
- atomic64_t __percpu *active;
- u64 __percpu *reserved;
- u32 bits;
- raw_spinlock_t lock;
- /* Which CPU requires context flush on next call */
- cpumask_t flush_pending;
- /* Number of ASID allocated by context (shift value) */
- unsigned int ctxt_shift;
- /* Callback to locally flush the context. */
- void (*flush_cpu_ctxt_cb)(void);
-} asid_info;
-
-#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
-#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
-
static DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(u64, reserved_asids);
-#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
-#define NUM_ASIDS(info) (1UL << ((info)->bits))
-
-#define ASID_FIRST_VERSION(info) NUM_ASIDS(info)
-
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define ASID_PER_CONTEXT 2
#else
#define ASID_PER_CONTEXT 1
#endif
-#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift)
-#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
-#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
+struct asid_info asid_info;
/* Get the ASIDBits supported by the current CPU */
static u32 get_cpu_asid_bits(void)
@@ -102,178 +77,6 @@ void verify_cpu_asid_bits(void)
}
}
-static void flush_context(struct asid_info *info)
-{
- int i;
- u64 asid;
-
- /* Update the list of reserved ASIDs and the ASID bitmap. */
- bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
-
- for_each_possible_cpu(i) {
- asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
- /*
- * If this CPU has already been through a
- * rollover, but hasn't run another task in
- * the meantime, we must preserve its reserved
- * ASID, as this is the only trace we have of
- * the process it is still running.
- */
- if (asid == 0)
- asid = reserved_asid(info, i);
- __set_bit(asid2idx(info, asid), info->map);
- reserved_asid(info, i) = asid;
- }
-
- /*
- * Queue a TLB invalidation for each CPU to perform on next
- * context-switch
- */
- cpumask_setall(&info->flush_pending);
-}
-
-static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
- u64 newasid)
-{
- int cpu;
- bool hit = false;
-
- /*
- * Iterate over the set of reserved ASIDs looking for a match.
- * If we find one, then we can update our mm to use newasid
- * (i.e. the same ASID in the current generation) but we can't
- * exit the loop early, since we need to ensure that all copies
- * of the old ASID are updated to reflect the mm. Failure to do
- * so could result in us missing the reserved ASID in a future
- * generation.
- */
- for_each_possible_cpu(cpu) {
- if (reserved_asid(info, cpu) == asid) {
- hit = true;
- reserved_asid(info, cpu) = newasid;
- }
- }
-
- return hit;
-}
-
-static u64 new_context(struct asid_info *info, atomic64_t *pasid)
-{
- static u32 cur_idx = 1;
- u64 asid = atomic64_read(pasid);
- u64 generation = atomic64_read(&info->generation);
-
- if (asid != 0) {
- u64 newasid = generation | (asid & ~ASID_MASK(info));
-
- /*
- * If our current ASID was active during a rollover, we
- * can continue to use it and this was just a false alarm.
- */
- if (check_update_reserved_asid(info, asid, newasid))
- return newasid;
-
- /*
- * We had a valid ASID in a previous life, so try to re-use
- * it if possible.
- */
- if (!__test_and_set_bit(asid2idx(info, asid), info->map))
- return newasid;
- }
-
- /*
- * Allocate a free ASID. If we can't find one, take a note of the
- * currently active ASIDs and mark the TLBs as requiring flushes. We
- * always count from ASID #2 (index 1), as we use ASID #0 when setting
- * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
- * pairs.
- */
- asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
- if (asid != NUM_CTXT_ASIDS(info))
- goto set_asid;
-
- /* We're out of ASIDs, so increment the global generation count */
- generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
- &info->generation);
- flush_context(info);
-
- /* We have more ASIDs than CPUs, so this will always succeed */
- asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
-
-set_asid:
- __set_bit(asid, info->map);
- cur_idx = asid;
- return idx2asid(info, asid) | generation;
-}
-
-static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
- unsigned int cpu);
-
-/*
- * Check the ASID is still valid for the context. If not generate a new ASID.
- *
- * @pasid: Pointer to the current ASID batch
- * @cpu: current CPU ID. Must have been acquired throught get_cpu()
- */
-static void asid_check_context(struct asid_info *info,
- atomic64_t *pasid, unsigned int cpu)
-{
- u64 asid, old_active_asid;
-
- asid = atomic64_read(pasid);
-
- /*
- * The memory ordering here is subtle.
- * If our active_asid is non-zero and the ASID matches the current
- * generation, then we update the active_asid entry with a relaxed
- * cmpxchg. Racing with a concurrent rollover means that either:
- *
- * - We get a zero back from the cmpxchg and end up waiting on the
- * lock. Taking the lock synchronises with the rollover and so
- * we are forced to see the updated generation.
- *
- * - We get a valid ASID back from the cmpxchg, which means the
- * relaxed xchg in flush_context will treat us as reserved
- * because atomic RmWs are totally ordered for a given location.
- */
- old_active_asid = atomic64_read(&active_asid(info, cpu));
- if (old_active_asid &&
- !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
- atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
- old_active_asid, asid))
- return;
-
- asid_new_context(info, pasid, cpu);
-}
-
-/*
- * Generate a new ASID for the context.
- *
- * @pasid: Pointer to the current ASID batch allocated. It will be updated
- * with the new ASID batch.
- * @cpu: current CPU ID. Must have been acquired through get_cpu()
- */
-static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
- unsigned int cpu)
-{
- unsigned long flags;
- u64 asid;
-
- raw_spin_lock_irqsave(&info->lock, flags);
- /* Check that our ASID belongs to the current generation. */
- asid = atomic64_read(pasid);
- if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
- asid = new_context(info, pasid);
- atomic64_set(pasid, asid);
- }
-
- if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
- info->flush_cpu_ctxt_cb();
-
- atomic64_set(&active_asid(info, cpu), asid);
- raw_spin_unlock_irqrestore(&info->lock, flags);
-}
-
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
{
if (system_supports_cnp())
@@ -305,38 +108,6 @@ static void asid_flush_cpu_ctxt(void)
local_flush_tlb_all();
}
-/*
- * Initialize the ASID allocator
- *
- * @info: Pointer to the asid allocator structure
- * @bits: Number of ASIDs available
- * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
- * allocated contiguously for a given context. This value should be a power of
- * 2.
- */
-static int asid_allocator_init(struct asid_info *info,
- u32 bits, unsigned int asid_per_ctxt,
- void (*flush_cpu_ctxt_cb)(void))
-{
- info->bits = bits;
- info->ctxt_shift = ilog2(asid_per_ctxt);
- info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
- /*
- * Expect allocation after rollover to fail if we don't have at least
- * one more ASID than CPUs. ASID #0 is always reserved.
- */
- WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
- atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
- info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
- sizeof(*info->map), GFP_KERNEL);
- if (!info->map)
- return -ENOMEM;
-
- raw_spin_lock_init(&info->lock);
-
- return 0;
-}
-
static int asids_init(void)
{
u32 bits = get_cpu_asid_bits();
@@ -344,7 +115,7 @@ static int asids_init(void)
if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT,
asid_flush_cpu_ctxt))
panic("Unable to initialize ASID allocator for %lu ASIDs\n",
- 1UL << bits);
+ NUM_ASIDS(&asid_info));
asid_info.active = &active_asids;
asid_info.reserved = &reserved_asids;
--
2.11.0
Flushing the local context will vary depending on the actual user of the ASID
allocator. Introduce a new callback to flush the local context and move
the call to flush local TLB in it.
Signed-off-by: Julien Grall <[email protected]>
---
arch/arm64/mm/context.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index cbf1c24cb3ee..678a57b77c91 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -39,6 +39,8 @@ struct asid_info
cpumask_t flush_pending;
/* Number of ASID allocated by context (shift value) */
unsigned int ctxt_shift;
+ /* Callback to locally flush the context. */
+ void (*flush_cpu_ctxt_cb)(void);
} asid_info;
#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -266,7 +268,7 @@ static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
}
if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
- local_flush_tlb_all();
+ info->flush_cpu_ctxt_cb();
atomic64_set(&active_asid(info, cpu), asid);
raw_spin_unlock_irqrestore(&info->lock, flags);
@@ -298,6 +300,11 @@ asmlinkage void post_ttbr_update_workaround(void)
CONFIG_CAVIUM_ERRATUM_27456));
}
+static void asid_flush_cpu_ctxt(void)
+{
+ local_flush_tlb_all();
+}
+
/*
* Initialize the ASID allocator
*
@@ -308,10 +315,12 @@ asmlinkage void post_ttbr_update_workaround(void)
* 2.
*/
static int asid_allocator_init(struct asid_info *info,
- u32 bits, unsigned int asid_per_ctxt)
+ u32 bits, unsigned int asid_per_ctxt,
+ void (*flush_cpu_ctxt_cb)(void))
{
info->bits = bits;
info->ctxt_shift = ilog2(asid_per_ctxt);
+ info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
/*
* Expect allocation after rollover to fail if we don't have at least
* one more ASID than CPUs. ASID #0 is always reserved.
@@ -332,7 +341,8 @@ static int asids_init(void)
{
u32 bits = get_cpu_asid_bits();
- if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT))
+ if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT,
+ asid_flush_cpu_ctxt))
panic("Unable to initialize ASID allocator for %lu ASIDs\n",
1UL << bits);
--
2.11.0
Move out the common initialization of the ASID allocator in a separate
function.
Signed-off-by: Julien Grall <[email protected]>
---
arch/arm64/mm/context.c | 43 +++++++++++++++++++++++++++++++------------
1 file changed, 31 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index fb13bc249951..b071a1b3469e 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -271,31 +271,50 @@ asmlinkage void post_ttbr_update_workaround(void)
CONFIG_CAVIUM_ERRATUM_27456));
}
-static int asids_init(void)
+/*
+ * Initialize the ASID allocator
+ *
+ * @info: Pointer to the asid allocator structure
+ * @bits: Number of ASIDs available
+ * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
+ * allocated contiguously for a given context. This value should be a power of
+ * 2.
+ */
+static int asid_allocator_init(struct asid_info *info,
+ u32 bits, unsigned int asid_per_ctxt)
{
- struct asid_info *info = &asid_info;
-
- info->bits = get_cpu_asid_bits();
- info->ctxt_shift = ilog2(ASID_PER_CONTEXT);
+ info->bits = bits;
+ info->ctxt_shift = ilog2(asid_per_ctxt);
/*
* Expect allocation after rollover to fail if we don't have at least
- * one more ASID than CPUs. ASID #0 is reserved for init_mm.
+ * one more ASID than CPUs. ASID #0 is always reserved.
*/
WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
sizeof(*info->map), GFP_KERNEL);
if (!info->map)
- panic("Failed to allocate bitmap for %lu ASIDs\n",
- NUM_CTXT_ASIDS(info));
-
- info->active = &active_asids;
- info->reserved = &reserved_asids;
+ return -ENOMEM;
raw_spin_lock_init(&info->lock);
+ return 0;
+}
+
+static int asids_init(void)
+{
+ u32 bits = get_cpu_asid_bits();
+
+ if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT))
+ panic("Unable to initialize ASID allocator for %lu ASIDs\n",
+ 1UL << bits);
+
+ asid_info.active = &active_asids;
+ asid_info.reserved = &reserved_asids;
+
pr_info("ASID allocator initialised with %lu entries\n",
- NUM_CTXT_ASIDS(info));
+ NUM_CTXT_ASIDS(&asid_info));
+
return 0;
}
early_initcall(asids_init);
--
2.11.0
The function new_context will be part of a generic ASID allocator. At
the moment, the MM structure is only used to fetch the ASID.
To remove the dependency on MM, it is possible to just pass a pointer to
the current ASID.
Signed-off-by: Julien Grall <[email protected]>
---
arch/arm64/mm/context.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e98ab348b9cb..488845c39c39 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -151,10 +151,10 @@ static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
return hit;
}
-static u64 new_context(struct asid_info *info, struct mm_struct *mm)
+static u64 new_context(struct asid_info *info, atomic64_t *pasid)
{
static u32 cur_idx = 1;
- u64 asid = atomic64_read(&mm->context.id);
+ u64 asid = atomic64_read(pasid);
u64 generation = atomic64_read(&info->generation);
if (asid != 0) {
@@ -236,7 +236,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
/* Check that our ASID belongs to the current generation. */
asid = atomic64_read(&mm->context.id);
if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
- asid = new_context(info, mm);
+ asid = new_context(info, &mm->context.id);
atomic64_set(&mm->context.id, asid);
}
--
2.11.0
Currently the number of ASID allocated per context is determined at
compilation time. As the algorithm is becoming generic, the user may
want to instantiate the ASID allocator multiple time with different
number of ASID allocated.
Add a field in asid_info to track the number ASID allocated per context.
This is stored in term of shift amount to avoid division in the code.
This means the number of ASID allocated per context should be a power of
two.
At the same time rename NUM_USERS_ASIDS to NUM_CTXT_ASIDS to make the
name more generic.
Signed-off-by: Julien Grall <[email protected]>
---
arch/arm64/mm/context.c | 31 +++++++++++++++++--------------
1 file changed, 17 insertions(+), 14 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 488845c39c39..5a4c2b1aac71 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -37,6 +37,8 @@ struct asid_info
raw_spinlock_t lock;
/* Which CPU requires context flush on next call */
cpumask_t flush_pending;
+ /* Number of ASID allocated by context (shift value) */
+ unsigned int ctxt_shift;
} asid_info;
#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -49,15 +51,15 @@ static DEFINE_PER_CPU(u64, reserved_asids);
#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits))
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define NUM_USER_ASIDS(info) (ASID_FIRST_VERSION(info) >> 1)
-#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> 1)
-#define idx2asid(info, idx) (((idx) << 1) & ~ASID_MASK(info))
+#define ASID_PER_CONTEXT 2
#else
-#define NUM_USER_ASIDS(info) (ASID_FIRST_VERSION(info))
-#define asid2idx(info, asid) ((asid) & ~ASID_MASK(info))
-#define idx2asid(info, idx) asid2idx(info, idx)
+#define ASID_PER_CONTEXT 1
#endif
+#define NUM_CTXT_ASIDS(info) (ASID_FIRST_VERSION(info) >> (info)->ctxt_shift)
+#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
+#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
+
/* Get the ASIDBits supported by the current CPU */
static u32 get_cpu_asid_bits(void)
{
@@ -102,7 +104,7 @@ static void flush_context(struct asid_info *info)
u64 asid;
/* Update the list of reserved ASIDs and the ASID bitmap. */
- bitmap_clear(info->map, 0, NUM_USER_ASIDS(info));
+ bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
@@ -182,8 +184,8 @@ static u64 new_context(struct asid_info *info, atomic64_t *pasid)
* a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
* pairs.
*/
- asid = find_next_zero_bit(info->map, NUM_USER_ASIDS(info), cur_idx);
- if (asid != NUM_USER_ASIDS(info))
+ asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
+ if (asid != NUM_CTXT_ASIDS(info))
goto set_asid;
/* We're out of ASIDs, so increment the global generation count */
@@ -192,7 +194,7 @@ static u64 new_context(struct asid_info *info, atomic64_t *pasid)
flush_context(info);
/* We have more ASIDs than CPUs, so this will always succeed */
- asid = find_next_zero_bit(info->map, NUM_USER_ASIDS(info), 1);
+ asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
set_asid:
__set_bit(asid, info->map);
@@ -272,17 +274,18 @@ static int asids_init(void)
struct asid_info *info = &asid_info;
info->bits = get_cpu_asid_bits();
+ info->ctxt_shift = ilog2(ASID_PER_CONTEXT);
/*
* Expect allocation after rollover to fail if we don't have at least
* one more ASID than CPUs. ASID #0 is reserved for init_mm.
*/
- WARN_ON(NUM_USER_ASIDS(info) - 1 <= num_possible_cpus());
+ WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
- info->map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS(info)),
+ info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
sizeof(*info->map), GFP_KERNEL);
if (!info->map)
panic("Failed to allocate bitmap for %lu ASIDs\n",
- NUM_USER_ASIDS(info));
+ NUM_CTXT_ASIDS(info));
info->active = &active_asids;
info->reserved = &reserved_asids;
@@ -290,7 +293,7 @@ static int asids_init(void)
raw_spin_lock_init(&info->lock);
pr_info("ASID allocator initialised with %lu entries\n",
- NUM_USER_ASIDS(info));
+ NUM_CTXT_ASIDS(info));
return 0;
}
early_initcall(asids_init);
--
2.11.0
The variables active_asids and reserved_asids hold information for a
given ASID allocator. So move them to the structure asid_info.
At the same time, introduce wrappers to access the active and reserved
ASIDs to make the code clearer.
Signed-off-by: Julien Grall <[email protected]>
---
arch/arm64/mm/context.c | 34 ++++++++++++++++++++++------------
1 file changed, 22 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 34db54f1a39a..cfe4c5f7abf3 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -34,10 +34,16 @@ struct asid_info
{
atomic64_t generation;
unsigned long *map;
+ atomic64_t __percpu *active;
+ u64 __percpu *reserved;
} asid_info;
+#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
+#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
+
static DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(u64, reserved_asids);
+
static cpumask_t tlb_flush_pending;
#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
@@ -100,7 +106,7 @@ static void flush_context(struct asid_info *info)
bitmap_clear(info->map, 0, NUM_USER_ASIDS);
for_each_possible_cpu(i) {
- asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
+ asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
/*
* If this CPU has already been through a
* rollover, but hasn't run another task in
@@ -109,9 +115,9 @@ static void flush_context(struct asid_info *info)
* the process it is still running.
*/
if (asid == 0)
- asid = per_cpu(reserved_asids, i);
+ asid = reserved_asid(info, i);
__set_bit(asid2idx(asid), info->map);
- per_cpu(reserved_asids, i) = asid;
+ reserved_asid(info, i) = asid;
}
/*
@@ -121,7 +127,8 @@ static void flush_context(struct asid_info *info)
cpumask_setall(&tlb_flush_pending);
}
-static bool check_update_reserved_asid(u64 asid, u64 newasid)
+static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
+ u64 newasid)
{
int cpu;
bool hit = false;
@@ -136,9 +143,9 @@ static bool check_update_reserved_asid(u64 asid, u64 newasid)
* generation.
*/
for_each_possible_cpu(cpu) {
- if (per_cpu(reserved_asids, cpu) == asid) {
+ if (reserved_asid(info, cpu) == asid) {
hit = true;
- per_cpu(reserved_asids, cpu) = newasid;
+ reserved_asid(info, cpu) = newasid;
}
}
@@ -158,7 +165,7 @@ static u64 new_context(struct asid_info *info, struct mm_struct *mm)
* If our current ASID was active during a rollover, we
* can continue to use it and this was just a false alarm.
*/
- if (check_update_reserved_asid(asid, newasid))
+ if (check_update_reserved_asid(info, asid, newasid))
return newasid;
/*
@@ -207,8 +214,8 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
/*
* The memory ordering here is subtle.
- * If our active_asids is non-zero and the ASID matches the current
- * generation, then we update the active_asids entry with a relaxed
+ * If our active_asid is non-zero and the ASID matches the current
+ * generation, then we update the active_asid entry with a relaxed
* cmpxchg. Racing with a concurrent rollover means that either:
*
* - We get a zero back from the cmpxchg and end up waiting on the
@@ -219,10 +226,10 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
* relaxed xchg in flush_context will treat us as reserved
* because atomic RmWs are totally ordered for a given location.
*/
- old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
+ old_active_asid = atomic64_read(&active_asid(info, cpu));
if (old_active_asid &&
!((asid ^ atomic64_read(&info->generation)) >> asid_bits) &&
- atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
+ atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
old_active_asid, asid))
goto switch_mm_fastpath;
@@ -237,7 +244,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
local_flush_tlb_all();
- atomic64_set(&per_cpu(active_asids, cpu), asid);
+ atomic64_set(&active_asid(info, cpu), asid);
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
switch_mm_fastpath:
@@ -278,6 +285,9 @@ static int asids_init(void)
panic("Failed to allocate bitmap for %lu ASIDs\n",
NUM_USER_ASIDS);
+ info->active = &active_asids;
+ info->reserved = &reserved_asids;
+
pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
return 0;
}
--
2.11.0
The variables lock and tlb_flush_pending holds information for a given
ASID allocator. So move them to the asid_info structure.
Signed-off-by: Julien Grall <[email protected]>
---
arch/arm64/mm/context.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index da17ed6c7117..e98ab348b9cb 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -27,8 +27,6 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>
-static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-
struct asid_info
{
atomic64_t generation;
@@ -36,6 +34,9 @@ struct asid_info
atomic64_t __percpu *active;
u64 __percpu *reserved;
u32 bits;
+ raw_spinlock_t lock;
+ /* Which CPU requires context flush on next call */
+ cpumask_t flush_pending;
} asid_info;
#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -44,8 +45,6 @@ struct asid_info
static DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(u64, reserved_asids);
-static cpumask_t tlb_flush_pending;
-
#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits))
@@ -124,7 +123,7 @@ static void flush_context(struct asid_info *info)
* Queue a TLB invalidation for each CPU to perform on next
* context-switch
*/
- cpumask_setall(&tlb_flush_pending);
+ cpumask_setall(&info->flush_pending);
}
static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
@@ -233,7 +232,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
old_active_asid, asid))
goto switch_mm_fastpath;
- raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+ raw_spin_lock_irqsave(&info->lock, flags);
/* Check that our ASID belongs to the current generation. */
asid = atomic64_read(&mm->context.id);
if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
@@ -241,11 +240,11 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
atomic64_set(&mm->context.id, asid);
}
- if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+ if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
local_flush_tlb_all();
atomic64_set(&active_asid(info, cpu), asid);
- raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+ raw_spin_unlock_irqrestore(&info->lock, flags);
switch_mm_fastpath:
@@ -288,6 +287,8 @@ static int asids_init(void)
info->active = &active_asids;
info->reserved = &reserved_asids;
+ raw_spin_lock_init(&info->lock);
+
pr_info("ASID allocator initialised with %lu entries\n",
NUM_USER_ASIDS(info));
return 0;
--
2.11.0
The variable bits hold information for a given ASID allocator. So move
it to the asid_info structure.
Because most of the macros were relying on bits, they are now taking an
extra parameter that is a pointer to the asid_info structure.
Signed-off-by: Julien Grall <[email protected]>
---
arch/arm64/mm/context.c | 59 +++++++++++++++++++++++++------------------------
1 file changed, 30 insertions(+), 29 deletions(-)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index cfe4c5f7abf3..da17ed6c7117 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -27,7 +27,6 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>
-static u32 asid_bits;
static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
struct asid_info
@@ -36,6 +35,7 @@ struct asid_info
unsigned long *map;
atomic64_t __percpu *active;
u64 __percpu *reserved;
+ u32 bits;
} asid_info;
#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -46,17 +46,17 @@ static DEFINE_PER_CPU(u64, reserved_asids);
static cpumask_t tlb_flush_pending;
-#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
-#define ASID_FIRST_VERSION (1UL << asid_bits)
+#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
+#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits))
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1)
-#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1)
-#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK)
+#define NUM_USER_ASIDS(info) (ASID_FIRST_VERSION(info) >> 1)
+#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> 1)
+#define idx2asid(info, idx) (((idx) << 1) & ~ASID_MASK(info))
#else
-#define NUM_USER_ASIDS (ASID_FIRST_VERSION)
-#define asid2idx(asid) ((asid) & ~ASID_MASK)
-#define idx2asid(idx) asid2idx(idx)
+#define NUM_USER_ASIDS(info) (ASID_FIRST_VERSION(info))
+#define asid2idx(info, asid) ((asid) & ~ASID_MASK(info))
+#define idx2asid(info, idx) asid2idx(info, idx)
#endif
/* Get the ASIDBits supported by the current CPU */
@@ -86,13 +86,13 @@ void verify_cpu_asid_bits(void)
{
u32 asid = get_cpu_asid_bits();
- if (asid < asid_bits) {
+ if (asid < asid_info.bits) {
/*
* We cannot decrease the ASID size at runtime, so panic if we support
* fewer ASID bits than the boot CPU.
*/
pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n",
- smp_processor_id(), asid, asid_bits);
+ smp_processor_id(), asid, asid_info.bits);
cpu_panic_kernel();
}
}
@@ -103,7 +103,7 @@ static void flush_context(struct asid_info *info)
u64 asid;
/* Update the list of reserved ASIDs and the ASID bitmap. */
- bitmap_clear(info->map, 0, NUM_USER_ASIDS);
+ bitmap_clear(info->map, 0, NUM_USER_ASIDS(info));
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
@@ -116,7 +116,7 @@ static void flush_context(struct asid_info *info)
*/
if (asid == 0)
asid = reserved_asid(info, i);
- __set_bit(asid2idx(asid), info->map);
+ __set_bit(asid2idx(info, asid), info->map);
reserved_asid(info, i) = asid;
}
@@ -159,7 +159,7 @@ static u64 new_context(struct asid_info *info, struct mm_struct *mm)
u64 generation = atomic64_read(&info->generation);
if (asid != 0) {
- u64 newasid = generation | (asid & ~ASID_MASK);
+ u64 newasid = generation | (asid & ~ASID_MASK(info));
/*
* If our current ASID was active during a rollover, we
@@ -172,7 +172,7 @@ static u64 new_context(struct asid_info *info, struct mm_struct *mm)
* We had a valid ASID in a previous life, so try to re-use
* it if possible.
*/
- if (!__test_and_set_bit(asid2idx(asid), info->map))
+ if (!__test_and_set_bit(asid2idx(info, asid), info->map))
return newasid;
}
@@ -183,22 +183,22 @@ static u64 new_context(struct asid_info *info, struct mm_struct *mm)
* a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
* pairs.
*/
- asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, cur_idx);
- if (asid != NUM_USER_ASIDS)
+ asid = find_next_zero_bit(info->map, NUM_USER_ASIDS(info), cur_idx);
+ if (asid != NUM_USER_ASIDS(info))
goto set_asid;
/* We're out of ASIDs, so increment the global generation count */
- generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
+ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
&info->generation);
flush_context(info);
/* We have more ASIDs than CPUs, so this will always succeed */
- asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, 1);
+ asid = find_next_zero_bit(info->map, NUM_USER_ASIDS(info), 1);
set_asid:
__set_bit(asid, info->map);
cur_idx = asid;
- return idx2asid(asid) | generation;
+ return idx2asid(info, asid) | generation;
}
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
@@ -228,7 +228,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
*/
old_active_asid = atomic64_read(&active_asid(info, cpu));
if (old_active_asid &&
- !((asid ^ atomic64_read(&info->generation)) >> asid_bits) &&
+ !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
old_active_asid, asid))
goto switch_mm_fastpath;
@@ -236,7 +236,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
raw_spin_lock_irqsave(&cpu_asid_lock, flags);
/* Check that our ASID belongs to the current generation. */
asid = atomic64_read(&mm->context.id);
- if ((asid ^ atomic64_read(&info->generation)) >> asid_bits) {
+ if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
asid = new_context(info, mm);
atomic64_set(&mm->context.id, asid);
}
@@ -272,23 +272,24 @@ static int asids_init(void)
{
struct asid_info *info = &asid_info;
- asid_bits = get_cpu_asid_bits();
+ info->bits = get_cpu_asid_bits();
/*
* Expect allocation after rollover to fail if we don't have at least
* one more ASID than CPUs. ASID #0 is reserved for init_mm.
*/
- WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus());
- atomic64_set(&info->generation, ASID_FIRST_VERSION);
- info->map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), sizeof(*info->map),
- GFP_KERNEL);
+ WARN_ON(NUM_USER_ASIDS(info) - 1 <= num_possible_cpus());
+ atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
+ info->map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS(info)),
+ sizeof(*info->map), GFP_KERNEL);
if (!info->map)
panic("Failed to allocate bitmap for %lu ASIDs\n",
- NUM_USER_ASIDS);
+ NUM_USER_ASIDS(info));
info->active = &active_asids;
info->reserved = &reserved_asids;
- pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
+ pr_info("ASID allocator initialised with %lu entries\n",
+ NUM_USER_ASIDS(info));
return 0;
}
early_initcall(asids_init);
--
2.11.0
Hi Julien,
On 21/03/2019 16:36, Julien Grall wrote:
> In an attempt to make the ASID allocator generic, create a new structure
> asid_info to store all the information necessary for the allocator.
>
> For now, move the variables asid_generation and asid_map to the new structure
> asid_info. Follow-up patches will move more variables.
>
> Note to avoid more renaming aftwards, a local variable 'info' has been
> created and is a pointer to the ASID allocator structure.
>
> Signed-off-by: Julien Grall <[email protected]>
> ---
> arch/arm64/mm/context.c | 46 ++++++++++++++++++++++++++--------------------
> 1 file changed, 26 insertions(+), 20 deletions(-)
>
> diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
> index 1f0ea2facf24..34db54f1a39a 100644
> --- a/arch/arm64/mm/context.c
> +++ b/arch/arm64/mm/context.c
> @@ -30,8 +30,11 @@
> static u32 asid_bits;
> static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
>
> -static atomic64_t asid_generation;
> -static unsigned long *asid_map;
> +struct asid_info
> +{
> + atomic64_t generation;
> + unsigned long *map;
> +} asid_info;
Shouldn't this be static ? Rest looks fine.
Cheers
Suzuki
On 3/21/19 5:03 PM, Suzuki K Poulose wrote:
> Hi Julien,
Hi Suzuki,
> On 21/03/2019 16:36, Julien Grall wrote:
>> In an attempt to make the ASID allocator generic, create a new structure
>> asid_info to store all the information necessary for the allocator.
>>
>> For now, move the variables asid_generation and asid_map to the new
>> structure
>> asid_info. Follow-up patches will move more variables.
>>
>> Note to avoid more renaming aftwards, a local variable 'info' has been
>> created and is a pointer to the ASID allocator structure.
>>
>> Signed-off-by: Julien Grall <[email protected]>
>> ---
>> arch/arm64/mm/context.c | 46
>> ++++++++++++++++++++++++++--------------------
>> 1 file changed, 26 insertions(+), 20 deletions(-)
>>
>> diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
>> index 1f0ea2facf24..34db54f1a39a 100644
>> --- a/arch/arm64/mm/context.c
>> +++ b/arch/arm64/mm/context.c
>> @@ -30,8 +30,11 @@
>> static u32 asid_bits;
>> static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
>> -static atomic64_t asid_generation;
>> -static unsigned long *asid_map;
>> +struct asid_info
>> +{
>> + atomic64_t generation;
>> + unsigned long *map;
>> +} asid_info;
>
> Shouldn't this be static ? Rest looks fine.
Yes it should be static. I have updated my code.
Thank you for the review!
Cheers,
>
> Cheers
> Suzuki
--
Julien Grall
Hi,
I am CCing RISC-V folks to see if there are an interest to share the code.
@RISC-V: I noticed you are discussing about importing a version of ASID
allocator in RISC-V. At a first look, the code looks quite similar. Would the
library below helps you?
Cheers,
On 21/03/2019 16:36, Julien Grall wrote:
> We will want to re-use the ASID allocator in a separate context (e.g
> allocating VMID). So move the code in a new file.
>
> The function asid_check_context has been moved in the header as a static
> inline function because we want to avoid add a branch when checking if the
> ASID is still valid.
>
> Signed-off-by: Julien Grall <[email protected]>
>
> ---
>
> This code will be used in the virt code for allocating VMID. I am not
> entirely sure where to place it. Lib could potentially be a good place but I
> am not entirely convinced the algo as it is could be used by other
> architecture.
>
> Looking at x86, it seems that it will not be possible to re-use because
> the number of PCID (aka ASID) could be smaller than the number of CPUs.
> See commit message 10af6235e0d327d42e1bad974385197817923dc1 "x86/mm:
> Implement PCID based optimization: try to preserve old TLB entries using
> PCI".
> ---
> arch/arm64/include/asm/asid.h | 77 ++++++++++++++
> arch/arm64/lib/Makefile | 2 +
> arch/arm64/lib/asid.c | 185 +++++++++++++++++++++++++++++++++
> arch/arm64/mm/context.c | 235 +-----------------------------------------
> 4 files changed, 267 insertions(+), 232 deletions(-)
> create mode 100644 arch/arm64/include/asm/asid.h
> create mode 100644 arch/arm64/lib/asid.c
>
> diff --git a/arch/arm64/include/asm/asid.h b/arch/arm64/include/asm/asid.h
> new file mode 100644
> index 000000000000..bb62b587f37f
> --- /dev/null
> +++ b/arch/arm64/include/asm/asid.h
> @@ -0,0 +1,77 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ASM_ASM_ASID_H
> +#define __ASM_ASM_ASID_H
> +
> +#include <linux/atomic.h>
> +#include <linux/compiler.h>
> +#include <linux/cpumask.h>
> +#include <linux/percpu.h>
> +#include <linux/spinlock.h>
> +
> +struct asid_info
> +{
> + atomic64_t generation;
> + unsigned long *map;
> + atomic64_t __percpu *active;
> + u64 __percpu *reserved;
> + u32 bits;
> + /* Lock protecting the structure */
> + raw_spinlock_t lock;
> + /* Which CPU requires context flush on next call */
> + cpumask_t flush_pending;
> + /* Number of ASID allocated by context (shift value) */
> + unsigned int ctxt_shift;
> + /* Callback to locally flush the context. */
> + void (*flush_cpu_ctxt_cb)(void);
> +};
> +
> +#define NUM_ASIDS(info) (1UL << ((info)->bits))
> +#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift)
> +
> +#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
> +
> +void asid_new_context(struct asid_info *info, atomic64_t *pasid,
> + unsigned int cpu);
> +
> +/*
> + * Check the ASID is still valid for the context. If not generate a new ASID.
> + *
> + * @pasid: Pointer to the current ASID batch
> + * @cpu: current CPU ID. Must have been acquired throught get_cpu()
> + */
> +static inline void asid_check_context(struct asid_info *info,
> + atomic64_t *pasid, unsigned int cpu)
> +{
> + u64 asid, old_active_asid;
> +
> + asid = atomic64_read(pasid);
> +
> + /*
> + * The memory ordering here is subtle.
> + * If our active_asid is non-zero and the ASID matches the current
> + * generation, then we update the active_asid entry with a relaxed
> + * cmpxchg. Racing with a concurrent rollover means that either:
> + *
> + * - We get a zero back from the cmpxchg and end up waiting on the
> + * lock. Taking the lock synchronises with the rollover and so
> + * we are forced to see the updated generation.
> + *
> + * - We get a valid ASID back from the cmpxchg, which means the
> + * relaxed xchg in flush_context will treat us as reserved
> + * because atomic RmWs are totally ordered for a given location.
> + */
> + old_active_asid = atomic64_read(&active_asid(info, cpu));
> + if (old_active_asid &&
> + !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
> + atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
> + old_active_asid, asid))
> + return;
> +
> + asid_new_context(info, pasid, cpu);
> +}
> +
> +int asid_allocator_init(struct asid_info *info,
> + u32 bits, unsigned int asid_per_ctxt,
> + void (*flush_cpu_ctxt_cb)(void));
> +
> +#endif
> diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
> index 5540a1638baf..720df5ee2aa2 100644
> --- a/arch/arm64/lib/Makefile
> +++ b/arch/arm64/lib/Makefile
> @@ -5,6 +5,8 @@ lib-y := clear_user.o delay.o copy_from_user.o \
> memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
> strchr.o strrchr.o tishift.o
>
> +lib-y += asid.o
> +
> ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
> obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
> CFLAGS_REMOVE_xor-neon.o += -mgeneral-regs-only
> diff --git a/arch/arm64/lib/asid.c b/arch/arm64/lib/asid.c
> new file mode 100644
> index 000000000000..72b71bfb32be
> --- /dev/null
> +++ b/arch/arm64/lib/asid.c
> @@ -0,0 +1,185 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Generic ASID allocator.
> + *
> + * Based on arch/arm/mm/context.c
> + *
> + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
> + * Copyright (C) 2012 ARM Ltd.
> + */
> +
> +#include <linux/slab.h>
> +
> +#include <asm/asid.h>
> +
> +#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
> +
> +#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
> +#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits))
> +
> +#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
> +#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
> +
> +static void flush_context(struct asid_info *info)
> +{
> + int i;
> + u64 asid;
> +
> + /* Update the list of reserved ASIDs and the ASID bitmap. */
> + bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
> +
> + for_each_possible_cpu(i) {
> + asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
> + /*
> + * If this CPU has already been through a
> + * rollover, but hasn't run another task in
> + * the meantime, we must preserve its reserved
> + * ASID, as this is the only trace we have of
> + * the process it is still running.
> + */
> + if (asid == 0)
> + asid = reserved_asid(info, i);
> + __set_bit(asid2idx(info, asid), info->map);
> + reserved_asid(info, i) = asid;
> + }
> +
> + /*
> + * Queue a TLB invalidation for each CPU to perform on next
> + * context-switch
> + */
> + cpumask_setall(&info->flush_pending);
> +}
> +
> +static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
> + u64 newasid)
> +{
> + int cpu;
> + bool hit = false;
> +
> + /*
> + * Iterate over the set of reserved ASIDs looking for a match.
> + * If we find one, then we can update our mm to use newasid
> + * (i.e. the same ASID in the current generation) but we can't
> + * exit the loop early, since we need to ensure that all copies
> + * of the old ASID are updated to reflect the mm. Failure to do
> + * so could result in us missing the reserved ASID in a future
> + * generation.
> + */
> + for_each_possible_cpu(cpu) {
> + if (reserved_asid(info, cpu) == asid) {
> + hit = true;
> + reserved_asid(info, cpu) = newasid;
> + }
> + }
> +
> + return hit;
> +}
> +
> +static u64 new_context(struct asid_info *info, atomic64_t *pasid)
> +{
> + static u32 cur_idx = 1;
> + u64 asid = atomic64_read(pasid);
> + u64 generation = atomic64_read(&info->generation);
> +
> + if (asid != 0) {
> + u64 newasid = generation | (asid & ~ASID_MASK(info));
> +
> + /*
> + * If our current ASID was active during a rollover, we
> + * can continue to use it and this was just a false alarm.
> + */
> + if (check_update_reserved_asid(info, asid, newasid))
> + return newasid;
> +
> + /*
> + * We had a valid ASID in a previous life, so try to re-use
> + * it if possible.
> + */
> + if (!__test_and_set_bit(asid2idx(info, asid), info->map))
> + return newasid;
> + }
> +
> + /*
> + * Allocate a free ASID. If we can't find one, take a note of the
> + * currently active ASIDs and mark the TLBs as requiring flushes. We
> + * always count from ASID #2 (index 1), as we use ASID #0 when setting
> + * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
> + * pairs.
> + */
> + asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
> + if (asid != NUM_CTXT_ASIDS(info))
> + goto set_asid;
> +
> + /* We're out of ASIDs, so increment the global generation count */
> + generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
> + &info->generation);
> + flush_context(info);
> +
> + /* We have more ASIDs than CPUs, so this will always succeed */
> + asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
> +
> +set_asid:
> + __set_bit(asid, info->map);
> + cur_idx = asid;
> + return idx2asid(info, asid) | generation;
> +}
> +
> +/*
> + * Generate a new ASID for the context.
> + *
> + * @pasid: Pointer to the current ASID batch allocated. It will be updated
> + * with the new ASID batch.
> + * @cpu: current CPU ID. Must have been acquired through get_cpu()
> + */
> +void asid_new_context(struct asid_info *info, atomic64_t *pasid,
> + unsigned int cpu)
> +{
> + unsigned long flags;
> + u64 asid;
> +
> + raw_spin_lock_irqsave(&info->lock, flags);
> + /* Check that our ASID belongs to the current generation. */
> + asid = atomic64_read(pasid);
> + if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
> + asid = new_context(info, pasid);
> + atomic64_set(pasid, asid);
> + }
> +
> + if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
> + info->flush_cpu_ctxt_cb();
> +
> + atomic64_set(&active_asid(info, cpu), asid);
> + raw_spin_unlock_irqrestore(&info->lock, flags);
> +}
> +
> +/*
> + * Initialize the ASID allocator
> + *
> + * @info: Pointer to the asid allocator structure
> + * @bits: Number of ASIDs available
> + * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
> + * allocated contiguously for a given context. This value should be a power of
> + * 2.
> + */
> +int asid_allocator_init(struct asid_info *info,
> + u32 bits, unsigned int asid_per_ctxt,
> + void (*flush_cpu_ctxt_cb)(void))
> +{
> + info->bits = bits;
> + info->ctxt_shift = ilog2(asid_per_ctxt);
> + info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
> + /*
> + * Expect allocation after rollover to fail if we don't have at least
> + * one more ASID than CPUs. ASID #0 is always reserved.
> + */
> + WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
> + atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
> + info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
> + sizeof(*info->map), GFP_KERNEL);
> + if (!info->map)
> + return -ENOMEM;
> +
> + raw_spin_lock_init(&info->lock);
> +
> + return 0;
> +}
> diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
> index 678a57b77c91..95ee7711a2ef 100644
> --- a/arch/arm64/mm/context.c
> +++ b/arch/arm64/mm/context.c
> @@ -22,47 +22,22 @@
> #include <linux/slab.h>
> #include <linux/mm.h>
>
> +#include <asm/asid.h>
> #include <asm/cpufeature.h>
> #include <asm/mmu_context.h>
> #include <asm/smp.h>
> #include <asm/tlbflush.h>
>
> -struct asid_info
> -{
> - atomic64_t generation;
> - unsigned long *map;
> - atomic64_t __percpu *active;
> - u64 __percpu *reserved;
> - u32 bits;
> - raw_spinlock_t lock;
> - /* Which CPU requires context flush on next call */
> - cpumask_t flush_pending;
> - /* Number of ASID allocated by context (shift value) */
> - unsigned int ctxt_shift;
> - /* Callback to locally flush the context. */
> - void (*flush_cpu_ctxt_cb)(void);
> -} asid_info;
> -
> -#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
> -#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
> -
> static DEFINE_PER_CPU(atomic64_t, active_asids);
> static DEFINE_PER_CPU(u64, reserved_asids);
>
> -#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
> -#define NUM_ASIDS(info) (1UL << ((info)->bits))
> -
> -#define ASID_FIRST_VERSION(info) NUM_ASIDS(info)
> -
> #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
> #define ASID_PER_CONTEXT 2
> #else
> #define ASID_PER_CONTEXT 1
> #endif
>
> -#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift)
> -#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
> -#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
> +struct asid_info asid_info;
>
> /* Get the ASIDBits supported by the current CPU */
> static u32 get_cpu_asid_bits(void)
> @@ -102,178 +77,6 @@ void verify_cpu_asid_bits(void)
> }
> }
>
> -static void flush_context(struct asid_info *info)
> -{
> - int i;
> - u64 asid;
> -
> - /* Update the list of reserved ASIDs and the ASID bitmap. */
> - bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
> -
> - for_each_possible_cpu(i) {
> - asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
> - /*
> - * If this CPU has already been through a
> - * rollover, but hasn't run another task in
> - * the meantime, we must preserve its reserved
> - * ASID, as this is the only trace we have of
> - * the process it is still running.
> - */
> - if (asid == 0)
> - asid = reserved_asid(info, i);
> - __set_bit(asid2idx(info, asid), info->map);
> - reserved_asid(info, i) = asid;
> - }
> -
> - /*
> - * Queue a TLB invalidation for each CPU to perform on next
> - * context-switch
> - */
> - cpumask_setall(&info->flush_pending);
> -}
> -
> -static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
> - u64 newasid)
> -{
> - int cpu;
> - bool hit = false;
> -
> - /*
> - * Iterate over the set of reserved ASIDs looking for a match.
> - * If we find one, then we can update our mm to use newasid
> - * (i.e. the same ASID in the current generation) but we can't
> - * exit the loop early, since we need to ensure that all copies
> - * of the old ASID are updated to reflect the mm. Failure to do
> - * so could result in us missing the reserved ASID in a future
> - * generation.
> - */
> - for_each_possible_cpu(cpu) {
> - if (reserved_asid(info, cpu) == asid) {
> - hit = true;
> - reserved_asid(info, cpu) = newasid;
> - }
> - }
> -
> - return hit;
> -}
> -
> -static u64 new_context(struct asid_info *info, atomic64_t *pasid)
> -{
> - static u32 cur_idx = 1;
> - u64 asid = atomic64_read(pasid);
> - u64 generation = atomic64_read(&info->generation);
> -
> - if (asid != 0) {
> - u64 newasid = generation | (asid & ~ASID_MASK(info));
> -
> - /*
> - * If our current ASID was active during a rollover, we
> - * can continue to use it and this was just a false alarm.
> - */
> - if (check_update_reserved_asid(info, asid, newasid))
> - return newasid;
> -
> - /*
> - * We had a valid ASID in a previous life, so try to re-use
> - * it if possible.
> - */
> - if (!__test_and_set_bit(asid2idx(info, asid), info->map))
> - return newasid;
> - }
> -
> - /*
> - * Allocate a free ASID. If we can't find one, take a note of the
> - * currently active ASIDs and mark the TLBs as requiring flushes. We
> - * always count from ASID #2 (index 1), as we use ASID #0 when setting
> - * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
> - * pairs.
> - */
> - asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
> - if (asid != NUM_CTXT_ASIDS(info))
> - goto set_asid;
> -
> - /* We're out of ASIDs, so increment the global generation count */
> - generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
> - &info->generation);
> - flush_context(info);
> -
> - /* We have more ASIDs than CPUs, so this will always succeed */
> - asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
> -
> -set_asid:
> - __set_bit(asid, info->map);
> - cur_idx = asid;
> - return idx2asid(info, asid) | generation;
> -}
> -
> -static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
> - unsigned int cpu);
> -
> -/*
> - * Check the ASID is still valid for the context. If not generate a new ASID.
> - *
> - * @pasid: Pointer to the current ASID batch
> - * @cpu: current CPU ID. Must have been acquired throught get_cpu()
> - */
> -static void asid_check_context(struct asid_info *info,
> - atomic64_t *pasid, unsigned int cpu)
> -{
> - u64 asid, old_active_asid;
> -
> - asid = atomic64_read(pasid);
> -
> - /*
> - * The memory ordering here is subtle.
> - * If our active_asid is non-zero and the ASID matches the current
> - * generation, then we update the active_asid entry with a relaxed
> - * cmpxchg. Racing with a concurrent rollover means that either:
> - *
> - * - We get a zero back from the cmpxchg and end up waiting on the
> - * lock. Taking the lock synchronises with the rollover and so
> - * we are forced to see the updated generation.
> - *
> - * - We get a valid ASID back from the cmpxchg, which means the
> - * relaxed xchg in flush_context will treat us as reserved
> - * because atomic RmWs are totally ordered for a given location.
> - */
> - old_active_asid = atomic64_read(&active_asid(info, cpu));
> - if (old_active_asid &&
> - !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
> - atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
> - old_active_asid, asid))
> - return;
> -
> - asid_new_context(info, pasid, cpu);
> -}
> -
> -/*
> - * Generate a new ASID for the context.
> - *
> - * @pasid: Pointer to the current ASID batch allocated. It will be updated
> - * with the new ASID batch.
> - * @cpu: current CPU ID. Must have been acquired through get_cpu()
> - */
> -static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
> - unsigned int cpu)
> -{
> - unsigned long flags;
> - u64 asid;
> -
> - raw_spin_lock_irqsave(&info->lock, flags);
> - /* Check that our ASID belongs to the current generation. */
> - asid = atomic64_read(pasid);
> - if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
> - asid = new_context(info, pasid);
> - atomic64_set(pasid, asid);
> - }
> -
> - if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
> - info->flush_cpu_ctxt_cb();
> -
> - atomic64_set(&active_asid(info, cpu), asid);
> - raw_spin_unlock_irqrestore(&info->lock, flags);
> -}
> -
> void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
> {
> if (system_supports_cnp())
> @@ -305,38 +108,6 @@ static void asid_flush_cpu_ctxt(void)
> local_flush_tlb_all();
> }
>
> -/*
> - * Initialize the ASID allocator
> - *
> - * @info: Pointer to the asid allocator structure
> - * @bits: Number of ASIDs available
> - * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
> - * allocated contiguously for a given context. This value should be a power of
> - * 2.
> - */
> -static int asid_allocator_init(struct asid_info *info,
> - u32 bits, unsigned int asid_per_ctxt,
> - void (*flush_cpu_ctxt_cb)(void))
> -{
> - info->bits = bits;
> - info->ctxt_shift = ilog2(asid_per_ctxt);
> - info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
> - /*
> - * Expect allocation after rollover to fail if we don't have at least
> - * one more ASID than CPUs. ASID #0 is always reserved.
> - */
> - WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
> - atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
> - info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
> - sizeof(*info->map), GFP_KERNEL);
> - if (!info->map)
> - return -ENOMEM;
> -
> - raw_spin_lock_init(&info->lock);
> -
> - return 0;
> -}
> -
> static int asids_init(void)
> {
> u32 bits = get_cpu_asid_bits();
> @@ -344,7 +115,7 @@ static int asids_init(void)
> if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT,
> asid_flush_cpu_ctxt))
> panic("Unable to initialize ASID allocator for %lu ASIDs\n",
> - 1UL << bits);
> + NUM_ASIDS(&asid_info));
>
> asid_info.active = &active_asids;
> asid_info.reserved = &reserved_asids;
>
--
Julien Grall
On Wed, 05 Jun 2019 09:56:03 PDT (-0700), [email protected] wrote:
> Hi,
>
> I am CCing RISC-V folks to see if there are an interest to share the code.
>
> @RISC-V: I noticed you are discussing about importing a version of ASID
> allocator in RISC-V. At a first look, the code looks quite similar. Would the
> library below helps you?
Thanks! I didn't look that closely at the original patches because the
argument against them was just "we don't have any way to test this".
Unfortunately, we don't have the constraint that there are more ASIDs than CPUs
in the system. As a result I don't think we can use this ASID allocation
strategy.
>
> Cheers,
>
> On 21/03/2019 16:36, Julien Grall wrote:
>> We will want to re-use the ASID allocator in a separate context (e.g
>> allocating VMID). So move the code in a new file.
>>
>> The function asid_check_context has been moved in the header as a static
>> inline function because we want to avoid add a branch when checking if the
>> ASID is still valid.
>>
>> Signed-off-by: Julien Grall <[email protected]>
>>
>> ---
>>
>> This code will be used in the virt code for allocating VMID. I am not
>> entirely sure where to place it. Lib could potentially be a good place but I
>> am not entirely convinced the algo as it is could be used by other
>> architecture.
>>
>> Looking at x86, it seems that it will not be possible to re-use because
>> the number of PCID (aka ASID) could be smaller than the number of CPUs.
>> See commit message 10af6235e0d327d42e1bad974385197817923dc1 "x86/mm:
>> Implement PCID based optimization: try to preserve old TLB entries using
>> PCI".
>> ---
>> arch/arm64/include/asm/asid.h | 77 ++++++++++++++
>> arch/arm64/lib/Makefile | 2 +
>> arch/arm64/lib/asid.c | 185 +++++++++++++++++++++++++++++++++
>> arch/arm64/mm/context.c | 235 +-----------------------------------------
>> 4 files changed, 267 insertions(+), 232 deletions(-)
>> create mode 100644 arch/arm64/include/asm/asid.h
>> create mode 100644 arch/arm64/lib/asid.c
>>
>> diff --git a/arch/arm64/include/asm/asid.h b/arch/arm64/include/asm/asid.h
>> new file mode 100644
>> index 000000000000..bb62b587f37f
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/asid.h
>> @@ -0,0 +1,77 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef __ASM_ASM_ASID_H
>> +#define __ASM_ASM_ASID_H
>> +
>> +#include <linux/atomic.h>
>> +#include <linux/compiler.h>
>> +#include <linux/cpumask.h>
>> +#include <linux/percpu.h>
>> +#include <linux/spinlock.h>
>> +
>> +struct asid_info
>> +{
>> + atomic64_t generation;
>> + unsigned long *map;
>> + atomic64_t __percpu *active;
>> + u64 __percpu *reserved;
>> + u32 bits;
>> + /* Lock protecting the structure */
>> + raw_spinlock_t lock;
>> + /* Which CPU requires context flush on next call */
>> + cpumask_t flush_pending;
>> + /* Number of ASID allocated by context (shift value) */
>> + unsigned int ctxt_shift;
>> + /* Callback to locally flush the context. */
>> + void (*flush_cpu_ctxt_cb)(void);
>> +};
>> +
>> +#define NUM_ASIDS(info) (1UL << ((info)->bits))
>> +#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift)
>> +
>> +#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
>> +
>> +void asid_new_context(struct asid_info *info, atomic64_t *pasid,
>> + unsigned int cpu);
>> +
>> +/*
>> + * Check the ASID is still valid for the context. If not generate a new ASID.
>> + *
>> + * @pasid: Pointer to the current ASID batch
>> + * @cpu: current CPU ID. Must have been acquired throught get_cpu()
>> + */
>> +static inline void asid_check_context(struct asid_info *info,
>> + atomic64_t *pasid, unsigned int cpu)
>> +{
>> + u64 asid, old_active_asid;
>> +
>> + asid = atomic64_read(pasid);
>> +
>> + /*
>> + * The memory ordering here is subtle.
>> + * If our active_asid is non-zero and the ASID matches the current
>> + * generation, then we update the active_asid entry with a relaxed
>> + * cmpxchg. Racing with a concurrent rollover means that either:
>> + *
>> + * - We get a zero back from the cmpxchg and end up waiting on the
>> + * lock. Taking the lock synchronises with the rollover and so
>> + * we are forced to see the updated generation.
>> + *
>> + * - We get a valid ASID back from the cmpxchg, which means the
>> + * relaxed xchg in flush_context will treat us as reserved
>> + * because atomic RmWs are totally ordered for a given location.
>> + */
>> + old_active_asid = atomic64_read(&active_asid(info, cpu));
>> + if (old_active_asid &&
>> + !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
>> + atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
>> + old_active_asid, asid))
>> + return;
>> +
>> + asid_new_context(info, pasid, cpu);
>> +}
>> +
>> +int asid_allocator_init(struct asid_info *info,
>> + u32 bits, unsigned int asid_per_ctxt,
>> + void (*flush_cpu_ctxt_cb)(void));
>> +
>> +#endif
>> diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
>> index 5540a1638baf..720df5ee2aa2 100644
>> --- a/arch/arm64/lib/Makefile
>> +++ b/arch/arm64/lib/Makefile
>> @@ -5,6 +5,8 @@ lib-y := clear_user.o delay.o copy_from_user.o \
>> memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
>> strchr.o strrchr.o tishift.o
>>
>> +lib-y += asid.o
>> +
>> ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
>> obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
>> CFLAGS_REMOVE_xor-neon.o += -mgeneral-regs-only
>> diff --git a/arch/arm64/lib/asid.c b/arch/arm64/lib/asid.c
>> new file mode 100644
>> index 000000000000..72b71bfb32be
>> --- /dev/null
>> +++ b/arch/arm64/lib/asid.c
>> @@ -0,0 +1,185 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Generic ASID allocator.
>> + *
>> + * Based on arch/arm/mm/context.c
>> + *
>> + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
>> + * Copyright (C) 2012 ARM Ltd.
>> + */
>> +
>> +#include <linux/slab.h>
>> +
>> +#include <asm/asid.h>
>> +
>> +#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
>> +
>> +#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
>> +#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits))
>> +
>> +#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
>> +#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
>> +
>> +static void flush_context(struct asid_info *info)
>> +{
>> + int i;
>> + u64 asid;
>> +
>> + /* Update the list of reserved ASIDs and the ASID bitmap. */
>> + bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
>> +
>> + for_each_possible_cpu(i) {
>> + asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
>> + /*
>> + * If this CPU has already been through a
>> + * rollover, but hasn't run another task in
>> + * the meantime, we must preserve its reserved
>> + * ASID, as this is the only trace we have of
>> + * the process it is still running.
>> + */
>> + if (asid == 0)
>> + asid = reserved_asid(info, i);
>> + __set_bit(asid2idx(info, asid), info->map);
>> + reserved_asid(info, i) = asid;
>> + }
>> +
>> + /*
>> + * Queue a TLB invalidation for each CPU to perform on next
>> + * context-switch
>> + */
>> + cpumask_setall(&info->flush_pending);
>> +}
>> +
>> +static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
>> + u64 newasid)
>> +{
>> + int cpu;
>> + bool hit = false;
>> +
>> + /*
>> + * Iterate over the set of reserved ASIDs looking for a match.
>> + * If we find one, then we can update our mm to use newasid
>> + * (i.e. the same ASID in the current generation) but we can't
>> + * exit the loop early, since we need to ensure that all copies
>> + * of the old ASID are updated to reflect the mm. Failure to do
>> + * so could result in us missing the reserved ASID in a future
>> + * generation.
>> + */
>> + for_each_possible_cpu(cpu) {
>> + if (reserved_asid(info, cpu) == asid) {
>> + hit = true;
>> + reserved_asid(info, cpu) = newasid;
>> + }
>> + }
>> +
>> + return hit;
>> +}
>> +
>> +static u64 new_context(struct asid_info *info, atomic64_t *pasid)
>> +{
>> + static u32 cur_idx = 1;
>> + u64 asid = atomic64_read(pasid);
>> + u64 generation = atomic64_read(&info->generation);
>> +
>> + if (asid != 0) {
>> + u64 newasid = generation | (asid & ~ASID_MASK(info));
>> +
>> + /*
>> + * If our current ASID was active during a rollover, we
>> + * can continue to use it and this was just a false alarm.
>> + */
>> + if (check_update_reserved_asid(info, asid, newasid))
>> + return newasid;
>> +
>> + /*
>> + * We had a valid ASID in a previous life, so try to re-use
>> + * it if possible.
>> + */
>> + if (!__test_and_set_bit(asid2idx(info, asid), info->map))
>> + return newasid;
>> + }
>> +
>> + /*
>> + * Allocate a free ASID. If we can't find one, take a note of the
>> + * currently active ASIDs and mark the TLBs as requiring flushes. We
>> + * always count from ASID #2 (index 1), as we use ASID #0 when setting
>> + * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
>> + * pairs.
>> + */
>> + asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
>> + if (asid != NUM_CTXT_ASIDS(info))
>> + goto set_asid;
>> +
>> + /* We're out of ASIDs, so increment the global generation count */
>> + generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
>> + &info->generation);
>> + flush_context(info);
>> +
>> + /* We have more ASIDs than CPUs, so this will always succeed */
>> + asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
>> +
>> +set_asid:
>> + __set_bit(asid, info->map);
>> + cur_idx = asid;
>> + return idx2asid(info, asid) | generation;
>> +}
>> +
>> +/*
>> + * Generate a new ASID for the context.
>> + *
>> + * @pasid: Pointer to the current ASID batch allocated. It will be updated
>> + * with the new ASID batch.
>> + * @cpu: current CPU ID. Must have been acquired through get_cpu()
>> + */
>> +void asid_new_context(struct asid_info *info, atomic64_t *pasid,
>> + unsigned int cpu)
>> +{
>> + unsigned long flags;
>> + u64 asid;
>> +
>> + raw_spin_lock_irqsave(&info->lock, flags);
>> + /* Check that our ASID belongs to the current generation. */
>> + asid = atomic64_read(pasid);
>> + if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
>> + asid = new_context(info, pasid);
>> + atomic64_set(pasid, asid);
>> + }
>> +
>> + if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
>> + info->flush_cpu_ctxt_cb();
>> +
>> + atomic64_set(&active_asid(info, cpu), asid);
>> + raw_spin_unlock_irqrestore(&info->lock, flags);
>> +}
>> +
>> +/*
>> + * Initialize the ASID allocator
>> + *
>> + * @info: Pointer to the asid allocator structure
>> + * @bits: Number of ASIDs available
>> + * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
>> + * allocated contiguously for a given context. This value should be a power of
>> + * 2.
>> + */
>> +int asid_allocator_init(struct asid_info *info,
>> + u32 bits, unsigned int asid_per_ctxt,
>> + void (*flush_cpu_ctxt_cb)(void))
>> +{
>> + info->bits = bits;
>> + info->ctxt_shift = ilog2(asid_per_ctxt);
>> + info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
>> + /*
>> + * Expect allocation after rollover to fail if we don't have at least
>> + * one more ASID than CPUs. ASID #0 is always reserved.
>> + */
>> + WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
>> + atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
>> + info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
>> + sizeof(*info->map), GFP_KERNEL);
>> + if (!info->map)
>> + return -ENOMEM;
>> +
>> + raw_spin_lock_init(&info->lock);
>> +
>> + return 0;
>> +}
>> diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
>> index 678a57b77c91..95ee7711a2ef 100644
>> --- a/arch/arm64/mm/context.c
>> +++ b/arch/arm64/mm/context.c
>> @@ -22,47 +22,22 @@
>> #include <linux/slab.h>
>> #include <linux/mm.h>
>>
>> +#include <asm/asid.h>
>> #include <asm/cpufeature.h>
>> #include <asm/mmu_context.h>
>> #include <asm/smp.h>
>> #include <asm/tlbflush.h>
>>
>> -struct asid_info
>> -{
>> - atomic64_t generation;
>> - unsigned long *map;
>> - atomic64_t __percpu *active;
>> - u64 __percpu *reserved;
>> - u32 bits;
>> - raw_spinlock_t lock;
>> - /* Which CPU requires context flush on next call */
>> - cpumask_t flush_pending;
>> - /* Number of ASID allocated by context (shift value) */
>> - unsigned int ctxt_shift;
>> - /* Callback to locally flush the context. */
>> - void (*flush_cpu_ctxt_cb)(void);
>> -} asid_info;
>> -
>> -#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
>> -#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
>> -
>> static DEFINE_PER_CPU(atomic64_t, active_asids);
>> static DEFINE_PER_CPU(u64, reserved_asids);
>>
>> -#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
>> -#define NUM_ASIDS(info) (1UL << ((info)->bits))
>> -
>> -#define ASID_FIRST_VERSION(info) NUM_ASIDS(info)
>> -
>> #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
>> #define ASID_PER_CONTEXT 2
>> #else
>> #define ASID_PER_CONTEXT 1
>> #endif
>>
>> -#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift)
>> -#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
>> -#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
>> +struct asid_info asid_info;
>>
>> /* Get the ASIDBits supported by the current CPU */
>> static u32 get_cpu_asid_bits(void)
>> @@ -102,178 +77,6 @@ void verify_cpu_asid_bits(void)
>> }
>> }
>>
>> -static void flush_context(struct asid_info *info)
>> -{
>> - int i;
>> - u64 asid;
>> -
>> - /* Update the list of reserved ASIDs and the ASID bitmap. */
>> - bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
>> -
>> - for_each_possible_cpu(i) {
>> - asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
>> - /*
>> - * If this CPU has already been through a
>> - * rollover, but hasn't run another task in
>> - * the meantime, we must preserve its reserved
>> - * ASID, as this is the only trace we have of
>> - * the process it is still running.
>> - */
>> - if (asid == 0)
>> - asid = reserved_asid(info, i);
>> - __set_bit(asid2idx(info, asid), info->map);
>> - reserved_asid(info, i) = asid;
>> - }
>> -
>> - /*
>> - * Queue a TLB invalidation for each CPU to perform on next
>> - * context-switch
>> - */
>> - cpumask_setall(&info->flush_pending);
>> -}
>> -
>> -static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
>> - u64 newasid)
>> -{
>> - int cpu;
>> - bool hit = false;
>> -
>> - /*
>> - * Iterate over the set of reserved ASIDs looking for a match.
>> - * If we find one, then we can update our mm to use newasid
>> - * (i.e. the same ASID in the current generation) but we can't
>> - * exit the loop early, since we need to ensure that all copies
>> - * of the old ASID are updated to reflect the mm. Failure to do
>> - * so could result in us missing the reserved ASID in a future
>> - * generation.
>> - */
>> - for_each_possible_cpu(cpu) {
>> - if (reserved_asid(info, cpu) == asid) {
>> - hit = true;
>> - reserved_asid(info, cpu) = newasid;
>> - }
>> - }
>> -
>> - return hit;
>> -}
>> -
>> -static u64 new_context(struct asid_info *info, atomic64_t *pasid)
>> -{
>> - static u32 cur_idx = 1;
>> - u64 asid = atomic64_read(pasid);
>> - u64 generation = atomic64_read(&info->generation);
>> -
>> - if (asid != 0) {
>> - u64 newasid = generation | (asid & ~ASID_MASK(info));
>> -
>> - /*
>> - * If our current ASID was active during a rollover, we
>> - * can continue to use it and this was just a false alarm.
>> - */
>> - if (check_update_reserved_asid(info, asid, newasid))
>> - return newasid;
>> -
>> - /*
>> - * We had a valid ASID in a previous life, so try to re-use
>> - * it if possible.
>> - */
>> - if (!__test_and_set_bit(asid2idx(info, asid), info->map))
>> - return newasid;
>> - }
>> -
>> - /*
>> - * Allocate a free ASID. If we can't find one, take a note of the
>> - * currently active ASIDs and mark the TLBs as requiring flushes. We
>> - * always count from ASID #2 (index 1), as we use ASID #0 when setting
>> - * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
>> - * pairs.
>> - */
>> - asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
>> - if (asid != NUM_CTXT_ASIDS(info))
>> - goto set_asid;
>> -
>> - /* We're out of ASIDs, so increment the global generation count */
>> - generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
>> - &info->generation);
>> - flush_context(info);
>> -
>> - /* We have more ASIDs than CPUs, so this will always succeed */
>> - asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
>> -
>> -set_asid:
>> - __set_bit(asid, info->map);
>> - cur_idx = asid;
>> - return idx2asid(info, asid) | generation;
>> -}
>> -
>> -static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
>> - unsigned int cpu);
>> -
>> -/*
>> - * Check the ASID is still valid for the context. If not generate a new ASID.
>> - *
>> - * @pasid: Pointer to the current ASID batch
>> - * @cpu: current CPU ID. Must have been acquired throught get_cpu()
>> - */
>> -static void asid_check_context(struct asid_info *info,
>> - atomic64_t *pasid, unsigned int cpu)
>> -{
>> - u64 asid, old_active_asid;
>> -
>> - asid = atomic64_read(pasid);
>> -
>> - /*
>> - * The memory ordering here is subtle.
>> - * If our active_asid is non-zero and the ASID matches the current
>> - * generation, then we update the active_asid entry with a relaxed
>> - * cmpxchg. Racing with a concurrent rollover means that either:
>> - *
>> - * - We get a zero back from the cmpxchg and end up waiting on the
>> - * lock. Taking the lock synchronises with the rollover and so
>> - * we are forced to see the updated generation.
>> - *
>> - * - We get a valid ASID back from the cmpxchg, which means the
>> - * relaxed xchg in flush_context will treat us as reserved
>> - * because atomic RmWs are totally ordered for a given location.
>> - */
>> - old_active_asid = atomic64_read(&active_asid(info, cpu));
>> - if (old_active_asid &&
>> - !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
>> - atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
>> - old_active_asid, asid))
>> - return;
>> -
>> - asid_new_context(info, pasid, cpu);
>> -}
>> -
>> -/*
>> - * Generate a new ASID for the context.
>> - *
>> - * @pasid: Pointer to the current ASID batch allocated. It will be updated
>> - * with the new ASID batch.
>> - * @cpu: current CPU ID. Must have been acquired through get_cpu()
>> - */
>> -static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
>> - unsigned int cpu)
>> -{
>> - unsigned long flags;
>> - u64 asid;
>> -
>> - raw_spin_lock_irqsave(&info->lock, flags);
>> - /* Check that our ASID belongs to the current generation. */
>> - asid = atomic64_read(pasid);
>> - if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
>> - asid = new_context(info, pasid);
>> - atomic64_set(pasid, asid);
>> - }
>> -
>> - if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
>> - info->flush_cpu_ctxt_cb();
>> -
>> - atomic64_set(&active_asid(info, cpu), asid);
>> - raw_spin_unlock_irqrestore(&info->lock, flags);
>> -}
>> -
>> void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
>> {
>> if (system_supports_cnp())
>> @@ -305,38 +108,6 @@ static void asid_flush_cpu_ctxt(void)
>> local_flush_tlb_all();
>> }
>>
>> -/*
>> - * Initialize the ASID allocator
>> - *
>> - * @info: Pointer to the asid allocator structure
>> - * @bits: Number of ASIDs available
>> - * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
>> - * allocated contiguously for a given context. This value should be a power of
>> - * 2.
>> - */
>> -static int asid_allocator_init(struct asid_info *info,
>> - u32 bits, unsigned int asid_per_ctxt,
>> - void (*flush_cpu_ctxt_cb)(void))
>> -{
>> - info->bits = bits;
>> - info->ctxt_shift = ilog2(asid_per_ctxt);
>> - info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
>> - /*
>> - * Expect allocation after rollover to fail if we don't have at least
>> - * one more ASID than CPUs. ASID #0 is always reserved.
>> - */
>> - WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
>> - atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
>> - info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
>> - sizeof(*info->map), GFP_KERNEL);
>> - if (!info->map)
>> - return -ENOMEM;
>> -
>> - raw_spin_lock_init(&info->lock);
>> -
>> - return 0;
>> -}
>> -
>> static int asids_init(void)
>> {
>> u32 bits = get_cpu_asid_bits();
>> @@ -344,7 +115,7 @@ static int asids_init(void)
>> if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT,
>> asid_flush_cpu_ctxt))
>> panic("Unable to initialize ASID allocator for %lu ASIDs\n",
>> - 1UL << bits);
>> + NUM_ASIDS(&asid_info));
>>
>> asid_info.active = &active_asids;
>> asid_info.reserved = &reserved_asids;
>>
Hi,
On RISC-V, we can only use ASID if there are more ASIDs than CPUs. If there aren't enough ASIDs (or if there is only 1), then ASID feature is disabled and 0 is used everywhere.
Best,
Gary
> -----Original Message-----
> From: Palmer Dabbelt <[email protected]>
> Sent: Wednesday, June 5, 2019 21:42
> To: [email protected]
> Cc: [email protected]; [email protected];
> [email protected]; [email protected]; Gary Guo
> <[email protected]>; Atish Patra <[email protected]>; Christoph Hellwig
> <[email protected]>; Paul Walmsley <[email protected]>;
> [email protected]; [email protected]; Anup Patel
> <[email protected]>; [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; Will Deacon <[email protected]>
> Subject: Re: [PATCH RFC 11/14] arm64: Move the ASID allocator code in a
> separate file
>
> On Wed, 05 Jun 2019 09:56:03 PDT (-0700), [email protected] wrote:
> > Hi,
> >
> > I am CCing RISC-V folks to see if there are an interest to share the code.
> >
> > @RISC-V: I noticed you are discussing about importing a version of ASID
> > allocator in RISC-V. At a first look, the code looks quite similar. Would the
> > library below helps you?
>
> Thanks! I didn't look that closely at the original patches because the
> argument against them was just "we don't have any way to test this".
> Unfortunately, we don't have the constraint that there are more ASIDs than
> CPUs
> in the system. As a result I don't think we can use this ASID allocation
> strategy.
>
> >
> > Cheers,
> >
> > On 21/03/2019 16:36, Julien Grall wrote:
> >> We will want to re-use the ASID allocator in a separate context (e.g
> >> allocating VMID). So move the code in a new file.
> >>
> >> The function asid_check_context has been moved in the header as a static
> >> inline function because we want to avoid add a branch when checking if the
> >> ASID is still valid.
> >>
> >> Signed-off-by: Julien Grall <[email protected]>
> >>
> >> ---
> >>
> >> This code will be used in the virt code for allocating VMID. I am not
> >> entirely sure where to place it. Lib could potentially be a good place but I
> >> am not entirely convinced the algo as it is could be used by other
> >> architecture.
> >>
> >> Looking at x86, it seems that it will not be possible to re-use because
> >> the number of PCID (aka ASID) could be smaller than the number of CPUs.
> >> See commit message 10af6235e0d327d42e1bad974385197817923dc1
> "x86/mm:
> >> Implement PCID based optimization: try to preserve old TLB entries using
> >> PCI".
> >> ---
> >> arch/arm64/include/asm/asid.h | 77 ++++++++++++++
> >> arch/arm64/lib/Makefile | 2 +
> >> arch/arm64/lib/asid.c | 185 +++++++++++++++++++++++++++++++++
> >> arch/arm64/mm/context.c | 235 +-----------------------------------------
> >> 4 files changed, 267 insertions(+), 232 deletions(-)
> >> create mode 100644 arch/arm64/include/asm/asid.h
> >> create mode 100644 arch/arm64/lib/asid.c
> >>
> >> diff --git a/arch/arm64/include/asm/asid.h b/arch/arm64/include/asm/asid.h
> >> new file mode 100644
> >> index 000000000000..bb62b587f37f
> >> --- /dev/null
> >> +++ b/arch/arm64/include/asm/asid.h
> >> @@ -0,0 +1,77 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +#ifndef __ASM_ASM_ASID_H
> >> +#define __ASM_ASM_ASID_H
> >> +
> >> +#include <linux/atomic.h>
> >> +#include <linux/compiler.h>
> >> +#include <linux/cpumask.h>
> >> +#include <linux/percpu.h>
> >> +#include <linux/spinlock.h>
> >> +
> >> +struct asid_info
> >> +{
> >> + atomic64_t generation;
> >> + unsigned long *map;
> >> + atomic64_t __percpu *active;
> >> + u64 __percpu *reserved;
> >> + u32 bits;
> >> + /* Lock protecting the structure */
> >> + raw_spinlock_t lock;
> >> + /* Which CPU requires context flush on next call */
> >> + cpumask_t flush_pending;
> >> + /* Number of ASID allocated by context (shift value) */
> >> + unsigned int ctxt_shift;
> >> + /* Callback to locally flush the context. */
> >> + void (*flush_cpu_ctxt_cb)(void);
> >> +};
> >> +
> >> +#define NUM_ASIDS(info) (1UL << ((info)->bits))
> >> +#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)-
> >ctxt_shift)
> >> +
> >> +#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
> >> +
> >> +void asid_new_context(struct asid_info *info, atomic64_t *pasid,
> >> + unsigned int cpu);
> >> +
> >> +/*
> >> + * Check the ASID is still valid for the context. If not generate a new ASID.
> >> + *
> >> + * @pasid: Pointer to the current ASID batch
> >> + * @cpu: current CPU ID. Must have been acquired throught get_cpu()
> >> + */
> >> +static inline void asid_check_context(struct asid_info *info,
> >> + atomic64_t *pasid, unsigned int cpu)
> >> +{
> >> + u64 asid, old_active_asid;
> >> +
> >> + asid = atomic64_read(pasid);
> >> +
> >> + /*
> >> + * The memory ordering here is subtle.
> >> + * If our active_asid is non-zero and the ASID matches the current
> >> + * generation, then we update the active_asid entry with a relaxed
> >> + * cmpxchg. Racing with a concurrent rollover means that either:
> >> + *
> >> + * - We get a zero back from the cmpxchg and end up waiting on the
> >> + * lock. Taking the lock synchronises with the rollover and so
> >> + * we are forced to see the updated generation.
> >> + *
> >> + * - We get a valid ASID back from the cmpxchg, which means the
> >> + * relaxed xchg in flush_context will treat us as reserved
> >> + * because atomic RmWs are totally ordered for a given location.
> >> + */
> >> + old_active_asid = atomic64_read(&active_asid(info, cpu));
> >> + if (old_active_asid &&
> >> + !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
> >> + atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
> >> + old_active_asid, asid))
> >> + return;
> >> +
> >> + asid_new_context(info, pasid, cpu);
> >> +}
> >> +
> >> +int asid_allocator_init(struct asid_info *info,
> >> + u32 bits, unsigned int asid_per_ctxt,
> >> + void (*flush_cpu_ctxt_cb)(void));
> >> +
> >> +#endif
> >> diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
> >> index 5540a1638baf..720df5ee2aa2 100644
> >> --- a/arch/arm64/lib/Makefile
> >> +++ b/arch/arm64/lib/Makefile
> >> @@ -5,6 +5,8 @@ lib-y := clear_user.o delay.o
> copy_from_user.o \
> >> memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
> >> strchr.o strrchr.o tishift.o
> >>
> >> +lib-y += asid.o
> >> +
> >> ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
> >> obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
> >> CFLAGS_REMOVE_xor-neon.o += -mgeneral-regs-only
> >> diff --git a/arch/arm64/lib/asid.c b/arch/arm64/lib/asid.c
> >> new file mode 100644
> >> index 000000000000..72b71bfb32be
> >> --- /dev/null
> >> +++ b/arch/arm64/lib/asid.c
> >> @@ -0,0 +1,185 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +/*
> >> + * Generic ASID allocator.
> >> + *
> >> + * Based on arch/arm/mm/context.c
> >> + *
> >> + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
> >> + * Copyright (C) 2012 ARM Ltd.
> >> + */
> >> +
> >> +#include <linux/slab.h>
> >> +
> >> +#include <asm/asid.h>
> >> +
> >> +#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
> >> +
> >> +#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
> >> +#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits))
> >> +
> >> +#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)-
> >ctxt_shift)
> >> +#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) &
> ~ASID_MASK(info))
> >> +
> >> +static void flush_context(struct asid_info *info)
> >> +{
> >> + int i;
> >> + u64 asid;
> >> +
> >> + /* Update the list of reserved ASIDs and the ASID bitmap. */
> >> + bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
> >> +
> >> + for_each_possible_cpu(i) {
> >> + asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
> >> + /*
> >> + * If this CPU has already been through a
> >> + * rollover, but hasn't run another task in
> >> + * the meantime, we must preserve its reserved
> >> + * ASID, as this is the only trace we have of
> >> + * the process it is still running.
> >> + */
> >> + if (asid == 0)
> >> + asid = reserved_asid(info, i);
> >> + __set_bit(asid2idx(info, asid), info->map);
> >> + reserved_asid(info, i) = asid;
> >> + }
> >> +
> >> + /*
> >> + * Queue a TLB invalidation for each CPU to perform on next
> >> + * context-switch
> >> + */
> >> + cpumask_setall(&info->flush_pending);
> >> +}
> >> +
> >> +static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
> >> + u64 newasid)
> >> +{
> >> + int cpu;
> >> + bool hit = false;
> >> +
> >> + /*
> >> + * Iterate over the set of reserved ASIDs looking for a match.
> >> + * If we find one, then we can update our mm to use newasid
> >> + * (i.e. the same ASID in the current generation) but we can't
> >> + * exit the loop early, since we need to ensure that all copies
> >> + * of the old ASID are updated to reflect the mm. Failure to do
> >> + * so could result in us missing the reserved ASID in a future
> >> + * generation.
> >> + */
> >> + for_each_possible_cpu(cpu) {
> >> + if (reserved_asid(info, cpu) == asid) {
> >> + hit = true;
> >> + reserved_asid(info, cpu) = newasid;
> >> + }
> >> + }
> >> +
> >> + return hit;
> >> +}
> >> +
> >> +static u64 new_context(struct asid_info *info, atomic64_t *pasid)
> >> +{
> >> + static u32 cur_idx = 1;
> >> + u64 asid = atomic64_read(pasid);
> >> + u64 generation = atomic64_read(&info->generation);
> >> +
> >> + if (asid != 0) {
> >> + u64 newasid = generation | (asid & ~ASID_MASK(info));
> >> +
> >> + /*
> >> + * If our current ASID was active during a rollover, we
> >> + * can continue to use it and this was just a false alarm.
> >> + */
> >> + if (check_update_reserved_asid(info, asid, newasid))
> >> + return newasid;
> >> +
> >> + /*
> >> + * We had a valid ASID in a previous life, so try to re-use
> >> + * it if possible.
> >> + */
> >> + if (!__test_and_set_bit(asid2idx(info, asid), info->map))
> >> + return newasid;
> >> + }
> >> +
> >> + /*
> >> + * Allocate a free ASID. If we can't find one, take a note of the
> >> + * currently active ASIDs and mark the TLBs as requiring flushes. We
> >> + * always count from ASID #2 (index 1), as we use ASID #0 when setting
> >> + * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
> >> + * pairs.
> >> + */
> >> + asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
> >> + if (asid != NUM_CTXT_ASIDS(info))
> >> + goto set_asid;
> >> +
> >> + /* We're out of ASIDs, so increment the global generation count */
> >> + generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
> >> + &info->generation);
> >> + flush_context(info);
> >> +
> >> + /* We have more ASIDs than CPUs, so this will always succeed */
> >> + asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
> >> +
> >> +set_asid:
> >> + __set_bit(asid, info->map);
> >> + cur_idx = asid;
> >> + return idx2asid(info, asid) | generation;
> >> +}
> >> +
> >> +/*
> >> + * Generate a new ASID for the context.
> >> + *
> >> + * @pasid: Pointer to the current ASID batch allocated. It will be updated
> >> + * with the new ASID batch.
> >> + * @cpu: current CPU ID. Must have been acquired through get_cpu()
> >> + */
> >> +void asid_new_context(struct asid_info *info, atomic64_t *pasid,
> >> + unsigned int cpu)
> >> +{
> >> + unsigned long flags;
> >> + u64 asid;
> >> +
> >> + raw_spin_lock_irqsave(&info->lock, flags);
> >> + /* Check that our ASID belongs to the current generation. */
> >> + asid = atomic64_read(pasid);
> >> + if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
> >> + asid = new_context(info, pasid);
> >> + atomic64_set(pasid, asid);
> >> + }
> >> +
> >> + if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
> >> + info->flush_cpu_ctxt_cb();
> >> +
> >> + atomic64_set(&active_asid(info, cpu), asid);
> >> + raw_spin_unlock_irqrestore(&info->lock, flags);
> >> +}
> >> +
> >> +/*
> >> + * Initialize the ASID allocator
> >> + *
> >> + * @info: Pointer to the asid allocator structure
> >> + * @bits: Number of ASIDs available
> >> + * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
> >> + * allocated contiguously for a given context. This value should be a power
> of
> >> + * 2.
> >> + */
> >> +int asid_allocator_init(struct asid_info *info,
> >> + u32 bits, unsigned int asid_per_ctxt,
> >> + void (*flush_cpu_ctxt_cb)(void))
> >> +{
> >> + info->bits = bits;
> >> + info->ctxt_shift = ilog2(asid_per_ctxt);
> >> + info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
> >> + /*
> >> + * Expect allocation after rollover to fail if we don't have at least
> >> + * one more ASID than CPUs. ASID #0 is always reserved.
> >> + */
> >> + WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
> >> + atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
> >> + info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
> >> + sizeof(*info->map), GFP_KERNEL);
> >> + if (!info->map)
> >> + return -ENOMEM;
> >> +
> >> + raw_spin_lock_init(&info->lock);
> >> +
> >> + return 0;
> >> +}
> >> diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
> >> index 678a57b77c91..95ee7711a2ef 100644
> >> --- a/arch/arm64/mm/context.c
> >> +++ b/arch/arm64/mm/context.c
> >> @@ -22,47 +22,22 @@
> >> #include <linux/slab.h>
> >> #include <linux/mm.h>
> >>
> >> +#include <asm/asid.h>
> >> #include <asm/cpufeature.h>
> >> #include <asm/mmu_context.h>
> >> #include <asm/smp.h>
> >> #include <asm/tlbflush.h>
> >>
> >> -struct asid_info
> >> -{
> >> - atomic64_t generation;
> >> - unsigned long *map;
> >> - atomic64_t __percpu *active;
> >> - u64 __percpu *reserved;
> >> - u32 bits;
> >> - raw_spinlock_t lock;
> >> - /* Which CPU requires context flush on next call */
> >> - cpumask_t flush_pending;
> >> - /* Number of ASID allocated by context (shift value) */
> >> - unsigned int ctxt_shift;
> >> - /* Callback to locally flush the context. */
> >> - void (*flush_cpu_ctxt_cb)(void);
> >> -} asid_info;
> >> -
> >> -#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
> >> -#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
> >> -
> >> static DEFINE_PER_CPU(atomic64_t, active_asids);
> >> static DEFINE_PER_CPU(u64, reserved_asids);
> >>
> >> -#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
> >> -#define NUM_ASIDS(info) (1UL << ((info)->bits))
> >> -
> >> -#define ASID_FIRST_VERSION(info) NUM_ASIDS(info)
> >> -
> >> #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
> >> #define ASID_PER_CONTEXT 2
> >> #else
> >> #define ASID_PER_CONTEXT 1
> >> #endif
> >>
> >> -#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)-
> >ctxt_shift)
> >> -#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)-
> >ctxt_shift)
> >> -#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) &
> ~ASID_MASK(info))
> >> +struct asid_info asid_info;
> >>
> >> /* Get the ASIDBits supported by the current CPU */
> >> static u32 get_cpu_asid_bits(void)
> >> @@ -102,178 +77,6 @@ void verify_cpu_asid_bits(void)
> >> }
> >> }
> >>
> >> -static void flush_context(struct asid_info *info)
> >> -{
> >> - int i;
> >> - u64 asid;
> >> -
> >> - /* Update the list of reserved ASIDs and the ASID bitmap. */
> >> - bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
> >> -
> >> - for_each_possible_cpu(i) {
> >> - asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
> >> - /*
> >> - * If this CPU has already been through a
> >> - * rollover, but hasn't run another task in
> >> - * the meantime, we must preserve its reserved
> >> - * ASID, as this is the only trace we have of
> >> - * the process it is still running.
> >> - */
> >> - if (asid == 0)
> >> - asid = reserved_asid(info, i);
> >> - __set_bit(asid2idx(info, asid), info->map);
> >> - reserved_asid(info, i) = asid;
> >> - }
> >> -
> >> - /*
> >> - * Queue a TLB invalidation for each CPU to perform on next
> >> - * context-switch
> >> - */
> >> - cpumask_setall(&info->flush_pending);
> >> -}
> >> -
> >> -static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
> >> - u64 newasid)
> >> -{
> >> - int cpu;
> >> - bool hit = false;
> >> -
> >> - /*
> >> - * Iterate over the set of reserved ASIDs looking for a match.
> >> - * If we find one, then we can update our mm to use newasid
> >> - * (i.e. the same ASID in the current generation) but we can't
> >> - * exit the loop early, since we need to ensure that all copies
> >> - * of the old ASID are updated to reflect the mm. Failure to do
> >> - * so could result in us missing the reserved ASID in a future
> >> - * generation.
> >> - */
> >> - for_each_possible_cpu(cpu) {
> >> - if (reserved_asid(info, cpu) == asid) {
> >> - hit = true;
> >> - reserved_asid(info, cpu) = newasid;
> >> - }
> >> - }
> >> -
> >> - return hit;
> >> -}
> >> -
> >> -static u64 new_context(struct asid_info *info, atomic64_t *pasid)
> >> -{
> >> - static u32 cur_idx = 1;
> >> - u64 asid = atomic64_read(pasid);
> >> - u64 generation = atomic64_read(&info->generation);
> >> -
> >> - if (asid != 0) {
> >> - u64 newasid = generation | (asid & ~ASID_MASK(info));
> >> -
> >> - /*
> >> - * If our current ASID was active during a rollover, we
> >> - * can continue to use it and this was just a false alarm.
> >> - */
> >> - if (check_update_reserved_asid(info, asid, newasid))
> >> - return newasid;
> >> -
> >> - /*
> >> - * We had a valid ASID in a previous life, so try to re-use
> >> - * it if possible.
> >> - */
> >> - if (!__test_and_set_bit(asid2idx(info, asid), info->map))
> >> - return newasid;
> >> - }
> >> -
> >> - /*
> >> - * Allocate a free ASID. If we can't find one, take a note of the
> >> - * currently active ASIDs and mark the TLBs as requiring flushes. We
> >> - * always count from ASID #2 (index 1), as we use ASID #0 when setting
> >> - * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
> >> - * pairs.
> >> - */
> >> - asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
> >> - if (asid != NUM_CTXT_ASIDS(info))
> >> - goto set_asid;
> >> -
> >> - /* We're out of ASIDs, so increment the global generation count */
> >> - generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
> >> - &info->generation);
> >> - flush_context(info);
> >> -
> >> - /* We have more ASIDs than CPUs, so this will always succeed */
> >> - asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
> >> -
> >> -set_asid:
> >> - __set_bit(asid, info->map);
> >> - cur_idx = asid;
> >> - return idx2asid(info, asid) | generation;
> >> -}
> >> -
> >> -static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
> >> - unsigned int cpu);
> >> -
> >> -/*
> >> - * Check the ASID is still valid for the context. If not generate a new ASID.
> >> - *
> >> - * @pasid: Pointer to the current ASID batch
> >> - * @cpu: current CPU ID. Must have been acquired throught get_cpu()
> >> - */
> >> -static void asid_check_context(struct asid_info *info,
> >> - atomic64_t *pasid, unsigned int cpu)
> >> -{
> >> - u64 asid, old_active_asid;
> >> -
> >> - asid = atomic64_read(pasid);
> >> -
> >> - /*
> >> - * The memory ordering here is subtle.
> >> - * If our active_asid is non-zero and the ASID matches the current
> >> - * generation, then we update the active_asid entry with a relaxed
> >> - * cmpxchg. Racing with a concurrent rollover means that either:
> >> - *
> >> - * - We get a zero back from the cmpxchg and end up waiting on the
> >> - * lock. Taking the lock synchronises with the rollover and so
> >> - * we are forced to see the updated generation.
> >> - *
> >> - * - We get a valid ASID back from the cmpxchg, which means the
> >> - * relaxed xchg in flush_context will treat us as reserved
> >> - * because atomic RmWs are totally ordered for a given location.
> >> - */
> >> - old_active_asid = atomic64_read(&active_asid(info, cpu));
> >> - if (old_active_asid &&
> >> - !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
> >> - atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
> >> - old_active_asid, asid))
> >> - return;
> >> -
> >> - asid_new_context(info, pasid, cpu);
> >> -}
> >> -
> >> -/*
> >> - * Generate a new ASID for the context.
> >> - *
> >> - * @pasid: Pointer to the current ASID batch allocated. It will be updated
> >> - * with the new ASID batch.
> >> - * @cpu: current CPU ID. Must have been acquired through get_cpu()
> >> - */
> >> -static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
> >> - unsigned int cpu)
> >> -{
> >> - unsigned long flags;
> >> - u64 asid;
> >> -
> >> - raw_spin_lock_irqsave(&info->lock, flags);
> >> - /* Check that our ASID belongs to the current generation. */
> >> - asid = atomic64_read(pasid);
> >> - if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
> >> - asid = new_context(info, pasid);
> >> - atomic64_set(pasid, asid);
> >> - }
> >> -
> >> - if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
> >> - info->flush_cpu_ctxt_cb();
> >> -
> >> - atomic64_set(&active_asid(info, cpu), asid);
> >> - raw_spin_unlock_irqrestore(&info->lock, flags);
> >> -}
> >> -
> >> void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
> >> {
> >> if (system_supports_cnp())
> >> @@ -305,38 +108,6 @@ static void asid_flush_cpu_ctxt(void)
> >> local_flush_tlb_all();
> >> }
> >>
> >> -/*
> >> - * Initialize the ASID allocator
> >> - *
> >> - * @info: Pointer to the asid allocator structure
> >> - * @bits: Number of ASIDs available
> >> - * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
> >> - * allocated contiguously for a given context. This value should be a power
> of
> >> - * 2.
> >> - */
> >> -static int asid_allocator_init(struct asid_info *info,
> >> - u32 bits, unsigned int asid_per_ctxt,
> >> - void (*flush_cpu_ctxt_cb)(void))
> >> -{
> >> - info->bits = bits;
> >> - info->ctxt_shift = ilog2(asid_per_ctxt);
> >> - info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
> >> - /*
> >> - * Expect allocation after rollover to fail if we don't have at least
> >> - * one more ASID than CPUs. ASID #0 is always reserved.
> >> - */
> >> - WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
> >> - atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
> >> - info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
> >> - sizeof(*info->map), GFP_KERNEL);
> >> - if (!info->map)
> >> - return -ENOMEM;
> >> -
> >> - raw_spin_lock_init(&info->lock);
> >> -
> >> - return 0;
> >> -}
> >> -
> >> static int asids_init(void)
> >> {
> >> u32 bits = get_cpu_asid_bits();
> >> @@ -344,7 +115,7 @@ static int asids_init(void)
> >> if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT,
> >> asid_flush_cpu_ctxt))
> >> panic("Unable to initialize ASID allocator for %lu ASIDs\n",
> >> - 1UL << bits);
> >> + NUM_ASIDS(&asid_info));
> >>
> >> asid_info.active = &active_asids;
> >> asid_info.reserved = &reserved_asids;
> >>
Hi Julien,
You forgot CCing C-SKY folks :P
Move arm asid allocator code in a generic one is a agood idea, I've
made a patchset for C-SKY and test is on processing, See:
https://lore.kernel.org/linux-csky/[email protected]/
If you plan to seperate it into generic one, I could co-work with you.
Or I'll bring asid code into csky subsystem first and you can cleanup
them later.
Best Regards
Guo Ren
ML: [email protected]
On Thu, Jun 6, 2019 at 12:56 AM Julien Grall <[email protected]> wrote:
>
> Hi,
>
> I am CCing RISC-V folks to see if there are an interest to share the code.
>
> @RISC-V: I noticed you are discussing about importing a version of ASID
> allocator in RISC-V. At a first look, the code looks quite similar. Would the
> library below helps you?
>
> Cheers,
>
> On 21/03/2019 16:36, Julien Grall wrote:
> > We will want to re-use the ASID allocator in a separate context (e.g
> > allocating VMID). So move the code in a new file.
> >
> > The function asid_check_context has been moved in the header as a static
> > inline function because we want to avoid add a branch when checking if the
> > ASID is still valid.
> >
> > Signed-off-by: Julien Grall <[email protected]>
> >
> > ---
> >
> > This code will be used in the virt code for allocating VMID. I am not
> > entirely sure where to place it. Lib could potentially be a good place but I
> > am not entirely convinced the algo as it is could be used by other
> > architecture.
> >
> > Looking at x86, it seems that it will not be possible to re-use because
> > the number of PCID (aka ASID) could be smaller than the number of CPUs.
> > See commit message 10af6235e0d327d42e1bad974385197817923dc1 "x86/mm:
> > Implement PCID based optimization: try to preserve old TLB entries using
> > PCI".
> > ---
> > arch/arm64/include/asm/asid.h | 77 ++++++++++++++
> > arch/arm64/lib/Makefile | 2 +
> > arch/arm64/lib/asid.c | 185 +++++++++++++++++++++++++++++++++
> > arch/arm64/mm/context.c | 235 +-----------------------------------------
> > 4 files changed, 267 insertions(+), 232 deletions(-)
> > create mode 100644 arch/arm64/include/asm/asid.h
> > create mode 100644 arch/arm64/lib/asid.c
> >
> > diff --git a/arch/arm64/include/asm/asid.h b/arch/arm64/include/asm/asid.h
> > new file mode 100644
> > index 000000000000..bb62b587f37f
> > --- /dev/null
> > +++ b/arch/arm64/include/asm/asid.h
> > @@ -0,0 +1,77 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef __ASM_ASM_ASID_H
> > +#define __ASM_ASM_ASID_H
> > +
> > +#include <linux/atomic.h>
> > +#include <linux/compiler.h>
> > +#include <linux/cpumask.h>
> > +#include <linux/percpu.h>
> > +#include <linux/spinlock.h>
> > +
> > +struct asid_info
> > +{
> > + atomic64_t generation;
> > + unsigned long *map;
> > + atomic64_t __percpu *active;
> > + u64 __percpu *reserved;
> > + u32 bits;
> > + /* Lock protecting the structure */
> > + raw_spinlock_t lock;
> > + /* Which CPU requires context flush on next call */
> > + cpumask_t flush_pending;
> > + /* Number of ASID allocated by context (shift value) */
> > + unsigned int ctxt_shift;
> > + /* Callback to locally flush the context. */
> > + void (*flush_cpu_ctxt_cb)(void);
> > +};
> > +
> > +#define NUM_ASIDS(info) (1UL << ((info)->bits))
> > +#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift)
> > +
> > +#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
> > +
> > +void asid_new_context(struct asid_info *info, atomic64_t *pasid,
> > + unsigned int cpu);
> > +
> > +/*
> > + * Check the ASID is still valid for the context. If not generate a new ASID.
> > + *
> > + * @pasid: Pointer to the current ASID batch
> > + * @cpu: current CPU ID. Must have been acquired throught get_cpu()
> > + */
> > +static inline void asid_check_context(struct asid_info *info,
> > + atomic64_t *pasid, unsigned int cpu)
> > +{
> > + u64 asid, old_active_asid;
> > +
> > + asid = atomic64_read(pasid);
> > +
> > + /*
> > + * The memory ordering here is subtle.
> > + * If our active_asid is non-zero and the ASID matches the current
> > + * generation, then we update the active_asid entry with a relaxed
> > + * cmpxchg. Racing with a concurrent rollover means that either:
> > + *
> > + * - We get a zero back from the cmpxchg and end up waiting on the
> > + * lock. Taking the lock synchronises with the rollover and so
> > + * we are forced to see the updated generation.
> > + *
> > + * - We get a valid ASID back from the cmpxchg, which means the
> > + * relaxed xchg in flush_context will treat us as reserved
> > + * because atomic RmWs are totally ordered for a given location.
> > + */
> > + old_active_asid = atomic64_read(&active_asid(info, cpu));
> > + if (old_active_asid &&
> > + !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
> > + atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
> > + old_active_asid, asid))
> > + return;
> > +
> > + asid_new_context(info, pasid, cpu);
> > +}
> > +
> > +int asid_allocator_init(struct asid_info *info,
> > + u32 bits, unsigned int asid_per_ctxt,
> > + void (*flush_cpu_ctxt_cb)(void));
> > +
> > +#endif
> > diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
> > index 5540a1638baf..720df5ee2aa2 100644
> > --- a/arch/arm64/lib/Makefile
> > +++ b/arch/arm64/lib/Makefile
> > @@ -5,6 +5,8 @@ lib-y := clear_user.o delay.o copy_from_user.o \
> > memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
> > strchr.o strrchr.o tishift.o
> >
> > +lib-y += asid.o
> > +
> > ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
> > obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
> > CFLAGS_REMOVE_xor-neon.o += -mgeneral-regs-only
> > diff --git a/arch/arm64/lib/asid.c b/arch/arm64/lib/asid.c
> > new file mode 100644
> > index 000000000000..72b71bfb32be
> > --- /dev/null
> > +++ b/arch/arm64/lib/asid.c
> > @@ -0,0 +1,185 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Generic ASID allocator.
> > + *
> > + * Based on arch/arm/mm/context.c
> > + *
> > + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
> > + * Copyright (C) 2012 ARM Ltd.
> > + */
> > +
> > +#include <linux/slab.h>
> > +
> > +#include <asm/asid.h>
> > +
> > +#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
> > +
> > +#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
> > +#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits))
> > +
> > +#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
> > +#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
> > +
> > +static void flush_context(struct asid_info *info)
> > +{
> > + int i;
> > + u64 asid;
> > +
> > + /* Update the list of reserved ASIDs and the ASID bitmap. */
> > + bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
> > +
> > + for_each_possible_cpu(i) {
> > + asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
> > + /*
> > + * If this CPU has already been through a
> > + * rollover, but hasn't run another task in
> > + * the meantime, we must preserve its reserved
> > + * ASID, as this is the only trace we have of
> > + * the process it is still running.
> > + */
> > + if (asid == 0)
> > + asid = reserved_asid(info, i);
> > + __set_bit(asid2idx(info, asid), info->map);
> > + reserved_asid(info, i) = asid;
> > + }
> > +
> > + /*
> > + * Queue a TLB invalidation for each CPU to perform on next
> > + * context-switch
> > + */
> > + cpumask_setall(&info->flush_pending);
> > +}
> > +
> > +static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
> > + u64 newasid)
> > +{
> > + int cpu;
> > + bool hit = false;
> > +
> > + /*
> > + * Iterate over the set of reserved ASIDs looking for a match.
> > + * If we find one, then we can update our mm to use newasid
> > + * (i.e. the same ASID in the current generation) but we can't
> > + * exit the loop early, since we need to ensure that all copies
> > + * of the old ASID are updated to reflect the mm. Failure to do
> > + * so could result in us missing the reserved ASID in a future
> > + * generation.
> > + */
> > + for_each_possible_cpu(cpu) {
> > + if (reserved_asid(info, cpu) == asid) {
> > + hit = true;
> > + reserved_asid(info, cpu) = newasid;
> > + }
> > + }
> > +
> > + return hit;
> > +}
> > +
> > +static u64 new_context(struct asid_info *info, atomic64_t *pasid)
> > +{
> > + static u32 cur_idx = 1;
> > + u64 asid = atomic64_read(pasid);
> > + u64 generation = atomic64_read(&info->generation);
> > +
> > + if (asid != 0) {
> > + u64 newasid = generation | (asid & ~ASID_MASK(info));
> > +
> > + /*
> > + * If our current ASID was active during a rollover, we
> > + * can continue to use it and this was just a false alarm.
> > + */
> > + if (check_update_reserved_asid(info, asid, newasid))
> > + return newasid;
> > +
> > + /*
> > + * We had a valid ASID in a previous life, so try to re-use
> > + * it if possible.
> > + */
> > + if (!__test_and_set_bit(asid2idx(info, asid), info->map))
> > + return newasid;
> > + }
> > +
> > + /*
> > + * Allocate a free ASID. If we can't find one, take a note of the
> > + * currently active ASIDs and mark the TLBs as requiring flushes. We
> > + * always count from ASID #2 (index 1), as we use ASID #0 when setting
> > + * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
> > + * pairs.
> > + */
> > + asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
> > + if (asid != NUM_CTXT_ASIDS(info))
> > + goto set_asid;
> > +
> > + /* We're out of ASIDs, so increment the global generation count */
> > + generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
> > + &info->generation);
> > + flush_context(info);
> > +
> > + /* We have more ASIDs than CPUs, so this will always succeed */
> > + asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
> > +
> > +set_asid:
> > + __set_bit(asid, info->map);
> > + cur_idx = asid;
> > + return idx2asid(info, asid) | generation;
> > +}
> > +
> > +/*
> > + * Generate a new ASID for the context.
> > + *
> > + * @pasid: Pointer to the current ASID batch allocated. It will be updated
> > + * with the new ASID batch.
> > + * @cpu: current CPU ID. Must have been acquired through get_cpu()
> > + */
> > +void asid_new_context(struct asid_info *info, atomic64_t *pasid,
> > + unsigned int cpu)
> > +{
> > + unsigned long flags;
> > + u64 asid;
> > +
> > + raw_spin_lock_irqsave(&info->lock, flags);
> > + /* Check that our ASID belongs to the current generation. */
> > + asid = atomic64_read(pasid);
> > + if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
> > + asid = new_context(info, pasid);
> > + atomic64_set(pasid, asid);
> > + }
> > +
> > + if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
> > + info->flush_cpu_ctxt_cb();
> > +
> > + atomic64_set(&active_asid(info, cpu), asid);
> > + raw_spin_unlock_irqrestore(&info->lock, flags);
> > +}
> > +
> > +/*
> > + * Initialize the ASID allocator
> > + *
> > + * @info: Pointer to the asid allocator structure
> > + * @bits: Number of ASIDs available
> > + * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
> > + * allocated contiguously for a given context. This value should be a power of
> > + * 2.
> > + */
> > +int asid_allocator_init(struct asid_info *info,
> > + u32 bits, unsigned int asid_per_ctxt,
> > + void (*flush_cpu_ctxt_cb)(void))
> > +{
> > + info->bits = bits;
> > + info->ctxt_shift = ilog2(asid_per_ctxt);
> > + info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
> > + /*
> > + * Expect allocation after rollover to fail if we don't have at least
> > + * one more ASID than CPUs. ASID #0 is always reserved.
> > + */
> > + WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
> > + atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
> > + info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
> > + sizeof(*info->map), GFP_KERNEL);
> > + if (!info->map)
> > + return -ENOMEM;
> > +
> > + raw_spin_lock_init(&info->lock);
> > +
> > + return 0;
> > +}
> > diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
> > index 678a57b77c91..95ee7711a2ef 100644
> > --- a/arch/arm64/mm/context.c
> > +++ b/arch/arm64/mm/context.c
> > @@ -22,47 +22,22 @@
> > #include <linux/slab.h>
> > #include <linux/mm.h>
> >
> > +#include <asm/asid.h>
> > #include <asm/cpufeature.h>
> > #include <asm/mmu_context.h>
> > #include <asm/smp.h>
> > #include <asm/tlbflush.h>
> >
> > -struct asid_info
> > -{
> > - atomic64_t generation;
> > - unsigned long *map;
> > - atomic64_t __percpu *active;
> > - u64 __percpu *reserved;
> > - u32 bits;
> > - raw_spinlock_t lock;
> > - /* Which CPU requires context flush on next call */
> > - cpumask_t flush_pending;
> > - /* Number of ASID allocated by context (shift value) */
> > - unsigned int ctxt_shift;
> > - /* Callback to locally flush the context. */
> > - void (*flush_cpu_ctxt_cb)(void);
> > -} asid_info;
> > -
> > -#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
> > -#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
> > -
> > static DEFINE_PER_CPU(atomic64_t, active_asids);
> > static DEFINE_PER_CPU(u64, reserved_asids);
> >
> > -#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0))
> > -#define NUM_ASIDS(info) (1UL << ((info)->bits))
> > -
> > -#define ASID_FIRST_VERSION(info) NUM_ASIDS(info)
> > -
> > #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
> > #define ASID_PER_CONTEXT 2
> > #else
> > #define ASID_PER_CONTEXT 1
> > #endif
> >
> > -#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift)
> > -#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift)
> > -#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info))
> > +struct asid_info asid_info;
> >
> > /* Get the ASIDBits supported by the current CPU */
> > static u32 get_cpu_asid_bits(void)
> > @@ -102,178 +77,6 @@ void verify_cpu_asid_bits(void)
> > }
> > }
> >
> > -static void flush_context(struct asid_info *info)
> > -{
> > - int i;
> > - u64 asid;
> > -
> > - /* Update the list of reserved ASIDs and the ASID bitmap. */
> > - bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
> > -
> > - for_each_possible_cpu(i) {
> > - asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
> > - /*
> > - * If this CPU has already been through a
> > - * rollover, but hasn't run another task in
> > - * the meantime, we must preserve its reserved
> > - * ASID, as this is the only trace we have of
> > - * the process it is still running.
> > - */
> > - if (asid == 0)
> > - asid = reserved_asid(info, i);
> > - __set_bit(asid2idx(info, asid), info->map);
> > - reserved_asid(info, i) = asid;
> > - }
> > -
> > - /*
> > - * Queue a TLB invalidation for each CPU to perform on next
> > - * context-switch
> > - */
> > - cpumask_setall(&info->flush_pending);
> > -}
> > -
> > -static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
> > - u64 newasid)
> > -{
> > - int cpu;
> > - bool hit = false;
> > -
> > - /*
> > - * Iterate over the set of reserved ASIDs looking for a match.
> > - * If we find one, then we can update our mm to use newasid
> > - * (i.e. the same ASID in the current generation) but we can't
> > - * exit the loop early, since we need to ensure that all copies
> > - * of the old ASID are updated to reflect the mm. Failure to do
> > - * so could result in us missing the reserved ASID in a future
> > - * generation.
> > - */
> > - for_each_possible_cpu(cpu) {
> > - if (reserved_asid(info, cpu) == asid) {
> > - hit = true;
> > - reserved_asid(info, cpu) = newasid;
> > - }
> > - }
> > -
> > - return hit;
> > -}
> > -
> > -static u64 new_context(struct asid_info *info, atomic64_t *pasid)
> > -{
> > - static u32 cur_idx = 1;
> > - u64 asid = atomic64_read(pasid);
> > - u64 generation = atomic64_read(&info->generation);
> > -
> > - if (asid != 0) {
> > - u64 newasid = generation | (asid & ~ASID_MASK(info));
> > -
> > - /*
> > - * If our current ASID was active during a rollover, we
> > - * can continue to use it and this was just a false alarm.
> > - */
> > - if (check_update_reserved_asid(info, asid, newasid))
> > - return newasid;
> > -
> > - /*
> > - * We had a valid ASID in a previous life, so try to re-use
> > - * it if possible.
> > - */
> > - if (!__test_and_set_bit(asid2idx(info, asid), info->map))
> > - return newasid;
> > - }
> > -
> > - /*
> > - * Allocate a free ASID. If we can't find one, take a note of the
> > - * currently active ASIDs and mark the TLBs as requiring flushes. We
> > - * always count from ASID #2 (index 1), as we use ASID #0 when setting
> > - * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
> > - * pairs.
> > - */
> > - asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
> > - if (asid != NUM_CTXT_ASIDS(info))
> > - goto set_asid;
> > -
> > - /* We're out of ASIDs, so increment the global generation count */
> > - generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info),
> > - &info->generation);
> > - flush_context(info);
> > -
> > - /* We have more ASIDs than CPUs, so this will always succeed */
> > - asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
> > -
> > -set_asid:
> > - __set_bit(asid, info->map);
> > - cur_idx = asid;
> > - return idx2asid(info, asid) | generation;
> > -}
> > -
> > -static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
> > - unsigned int cpu);
> > -
> > -/*
> > - * Check the ASID is still valid for the context. If not generate a new ASID.
> > - *
> > - * @pasid: Pointer to the current ASID batch
> > - * @cpu: current CPU ID. Must have been acquired throught get_cpu()
> > - */
> > -static void asid_check_context(struct asid_info *info,
> > - atomic64_t *pasid, unsigned int cpu)
> > -{
> > - u64 asid, old_active_asid;
> > -
> > - asid = atomic64_read(pasid);
> > -
> > - /*
> > - * The memory ordering here is subtle.
> > - * If our active_asid is non-zero and the ASID matches the current
> > - * generation, then we update the active_asid entry with a relaxed
> > - * cmpxchg. Racing with a concurrent rollover means that either:
> > - *
> > - * - We get a zero back from the cmpxchg and end up waiting on the
> > - * lock. Taking the lock synchronises with the rollover and so
> > - * we are forced to see the updated generation.
> > - *
> > - * - We get a valid ASID back from the cmpxchg, which means the
> > - * relaxed xchg in flush_context will treat us as reserved
> > - * because atomic RmWs are totally ordered for a given location.
> > - */
> > - old_active_asid = atomic64_read(&active_asid(info, cpu));
> > - if (old_active_asid &&
> > - !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
> > - atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
> > - old_active_asid, asid))
> > - return;
> > -
> > - asid_new_context(info, pasid, cpu);
> > -}
> > -
> > -/*
> > - * Generate a new ASID for the context.
> > - *
> > - * @pasid: Pointer to the current ASID batch allocated. It will be updated
> > - * with the new ASID batch.
> > - * @cpu: current CPU ID. Must have been acquired through get_cpu()
> > - */
> > -static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
> > - unsigned int cpu)
> > -{
> > - unsigned long flags;
> > - u64 asid;
> > -
> > - raw_spin_lock_irqsave(&info->lock, flags);
> > - /* Check that our ASID belongs to the current generation. */
> > - asid = atomic64_read(pasid);
> > - if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
> > - asid = new_context(info, pasid);
> > - atomic64_set(pasid, asid);
> > - }
> > -
> > - if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
> > - info->flush_cpu_ctxt_cb();
> > -
> > - atomic64_set(&active_asid(info, cpu), asid);
> > - raw_spin_unlock_irqrestore(&info->lock, flags);
> > -}
> > -
> > void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
> > {
> > if (system_supports_cnp())
> > @@ -305,38 +108,6 @@ static void asid_flush_cpu_ctxt(void)
> > local_flush_tlb_all();
> > }
> >
> > -/*
> > - * Initialize the ASID allocator
> > - *
> > - * @info: Pointer to the asid allocator structure
> > - * @bits: Number of ASIDs available
> > - * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
> > - * allocated contiguously for a given context. This value should be a power of
> > - * 2.
> > - */
> > -static int asid_allocator_init(struct asid_info *info,
> > - u32 bits, unsigned int asid_per_ctxt,
> > - void (*flush_cpu_ctxt_cb)(void))
> > -{
> > - info->bits = bits;
> > - info->ctxt_shift = ilog2(asid_per_ctxt);
> > - info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
> > - /*
> > - * Expect allocation after rollover to fail if we don't have at least
> > - * one more ASID than CPUs. ASID #0 is always reserved.
> > - */
> > - WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
> > - atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
> > - info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
> > - sizeof(*info->map), GFP_KERNEL);
> > - if (!info->map)
> > - return -ENOMEM;
> > -
> > - raw_spin_lock_init(&info->lock);
> > -
> > - return 0;
> > -}
> > -
> > static int asids_init(void)
> > {
> > u32 bits = get_cpu_asid_bits();
> > @@ -344,7 +115,7 @@ static int asids_init(void)
> > if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT,
> > asid_flush_cpu_ctxt))
> > panic("Unable to initialize ASID allocator for %lu ASIDs\n",
> > - 1UL << bits);
> > + NUM_ASIDS(&asid_info));
> >
> > asid_info.active = &active_asids;
> > asid_info.reserved = &reserved_asids;
> >
>
> --
> Julien Grall
>
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv
On 6/19/19 9:07 AM, Guo Ren wrote:
> Hi Julien,
Hi,
>
> You forgot CCing C-SKY folks :P
I wasn't aware you could be interested :).
>
> Move arm asid allocator code in a generic one is a agood idea, I've
> made a patchset for C-SKY and test is on processing, See:
> https://lore.kernel.org/linux-csky/[email protected]/
>
> If you plan to seperate it into generic one, I could co-work with you.
Was the ASID allocator work out of box on C-Sky? If so, I can easily
move the code in a generic place (maybe lib/asid.c).
Cheers,
--
Julien Grall
On Wed, Jun 19, 2019 at 09:54:21AM +0100, Julien Grall wrote:
> On 6/19/19 9:07 AM, Guo Ren wrote:
> > You forgot CCing C-SKY folks :P
>
> I wasn't aware you could be interested :).
>
> > Move arm asid allocator code in a generic one is a agood idea, I've
> > made a patchset for C-SKY and test is on processing, See:
> > https://lore.kernel.org/linux-csky/[email protected]/
> >
> > If you plan to seperate it into generic one, I could co-work with you.
>
> Was the ASID allocator work out of box on C-Sky? If so, I can easily move
> the code in a generic place (maybe lib/asid.c).
This is one place where I'd actually prefer not to go down the route of
making the code generic. Context-switching and low-level TLB management
is deeply architecture-specific and I worry that by trying to make this
code common, we run the real risk of introducing subtle bugs on some
architecture every time it is changed. Furthermore, the algorithm we use
on arm64 is designed to scale to large systems using DVM and may well be
too complex and/or sub-optimal for architectures with different system
topologies or TLB invalidation mechanisms.
It's not a lot of code, so I don't see that it's a big deal to keep it
under arch/arm64.
Will
On Wed, Jun 19, 2019 at 4:54 PM Julien Grall <[email protected]> wrote:
>
>
>
> On 6/19/19 9:07 AM, Guo Ren wrote:
> > Hi Julien,
>
> Hi,
>
> >
> > You forgot CCing C-SKY folks :P
>
> I wasn't aware you could be interested :).
>
> >
> > Move arm asid allocator code in a generic one is a agood idea, I've
> > made a patchset for C-SKY and test is on processing, See:
> > https://lore.kernel.org/linux-csky/[email protected]/
> >
> > If you plan to seperate it into generic one, I could co-work with you.
>
> Was the ASID allocator work out of box on C-Sky?
Almost done, but one question:
arm64 remove the code in switch_mm:
cpumask_clear_cpu(cpu, mm_cpumask(prev));
cpumask_set_cpu(cpu, mm_cpumask(next));
Why? Although arm64 cache operations could affect all harts with CTC
method of interconnect, I think we should
keep these code for primitive integrity in linux. Because cpu_bitmap
is in mm_struct instead of mm->context.
In current csky's patches I've also removed the codes the same as
arm64, but I'll add it back at next version.
> If so, I can easily move the code in a generic place (maybe lib/asid.c).
I think it's OK.
Best Regards
Guo Ren
On Wed, Jun 19, 2019 at 5:12 PM Will Deacon <[email protected]> wrote:
>
> On Wed, Jun 19, 2019 at 09:54:21AM +0100, Julien Grall wrote:
> > On 6/19/19 9:07 AM, Guo Ren wrote:
> > > You forgot CCing C-SKY folks :P
> >
> > I wasn't aware you could be interested :).
> >
> > > Move arm asid allocator code in a generic one is a agood idea, I've
> > > made a patchset for C-SKY and test is on processing, See:
> > > https://lore.kernel.org/linux-csky/[email protected]/
> > >
> > > If you plan to seperate it into generic one, I could co-work with you.
> >
> > Was the ASID allocator work out of box on C-Sky? If so, I can easily move
> > the code in a generic place (maybe lib/asid.c).
>
> This is one place where I'd actually prefer not to go down the route of
> making the code generic. Context-switching and low-level TLB management
> is deeply architecture-specific and I worry that by trying to make this
> code common, we run the real risk of introducing subtle bugs on some
> architecture every time it is changed.
"Add generic asid code" and "move arm's into generic" are two things.
We could do
first and let architecture's maintainer to choose.
> Furthermore, the algorithm we use
> on arm64 is designed to scale to large systems using DVM and may well be
> too complex and/or sub-optimal for architectures with different system
> topologies or TLB invalidation mechanisms.
It's just a asid algorithm not very complex and there is a callback
for architecture to define their
own local hart tlb flush. Seems it has nothing with DVM or tlb
broadcast mechanism.
>
> It's not a lot of code, so I don't see that it's a big deal to keep it
> under arch/arm64.
Yes, I think that's ok for arm64.
Hi Arnd,
What do you think about adding generic asid code for arch selection?
Best Regards
Guo Ren
On Wed, Jun 19, 2019 at 08:18:04PM +0800, Guo Ren wrote:
> On Wed, Jun 19, 2019 at 5:12 PM Will Deacon <[email protected]> wrote:
> > This is one place where I'd actually prefer not to go down the route of
> > making the code generic. Context-switching and low-level TLB management
> > is deeply architecture-specific and I worry that by trying to make this
> > code common, we run the real risk of introducing subtle bugs on some
> > architecture every time it is changed.
> "Add generic asid code" and "move arm's into generic" are two things.
> We could do
> first and let architecture's maintainer to choose.
If I understand the proposal being discussed, it involves basing that
generic ASID allocation code around the arm64 implementation which I don't
necessarily think is a good starting point.
> > Furthermore, the algorithm we use
> > on arm64 is designed to scale to large systems using DVM and may well be
> > too complex and/or sub-optimal for architectures with different system
> > topologies or TLB invalidation mechanisms.
> It's just a asid algorithm not very complex and there is a callback
> for architecture to define their
> own local hart tlb flush. Seems it has nothing with DVM or tlb
> broadcast mechanism.
I'm pleased that you think the algorithm is not very complex, but I'm also
worried that you might not have fully understood some of its finer details.
The reason I mention DVM and TLB broadcasting is because, depending on
the mechanisms in your architecture relating to those, it may be strictly
required that all concurrently running threads of a process have the same
ASID at any given point in time, or it may be that you really don't care.
If you don't care, then the arm64 allocator is over-engineered and likely
inefficient for your system. If you do care, then it's worth considering
whether a lock is sufficient around the allocator if you don't expect high
core counts. Another possibility is that you end up using only one ASID and
invalidating the local TLB on every context switch. Yet another design
would be to manage per-cpu ASID pools.
So rather than blindly copying the arm64 code, I suggest sitting down and
designing something that fits to your architecture instead. You may end up
with something that is both simpler and more efficient.
Will
Hi Guo,
On 19/06/2019 12:51, Guo Ren wrote:
> On Wed, Jun 19, 2019 at 4:54 PM Julien Grall <[email protected]> wrote:
>>
>>
>>
>> On 6/19/19 9:07 AM, Guo Ren wrote:
>>> Hi Julien,
>>
>> Hi,
>>
>>>
>>> You forgot CCing C-SKY folks :P
>>
>> I wasn't aware you could be interested :).
>>
>>>
>>> Move arm asid allocator code in a generic one is a agood idea, I've
>>> made a patchset for C-SKY and test is on processing, See:
>>> https://lore.kernel.org/linux-csky/[email protected]/
>>>
>>> If you plan to seperate it into generic one, I could co-work with you.
>>
>> Was the ASID allocator work out of box on C-Sky?
> Almost done, but one question:
> arm64 remove the code in switch_mm:
> cpumask_clear_cpu(cpu, mm_cpumask(prev));
> cpumask_set_cpu(cpu, mm_cpumask(next));
>
> Why? Although arm64 cache operations could affect all harts with CTC
> method of interconnect, I think we should
> keep these code for primitive integrity in linux. Because cpu_bitmap
> is in mm_struct instead of mm->context.
I will let Will answer to this.
[...]
>> If so, I can easily move the code in a generic place (maybe lib/asid.c).
> I think it's OK.
Will emits concern to move the code in lib. So I will stick with what I
currently have.
Cheers,
--
Julien Grall
On Wed, Jun 19, 2019 at 8:39 PM Will Deacon <[email protected]> wrote:
>
> On Wed, Jun 19, 2019 at 08:18:04PM +0800, Guo Ren wrote:
> > On Wed, Jun 19, 2019 at 5:12 PM Will Deacon <[email protected]> wrote:
> > > This is one place where I'd actually prefer not to go down the route of
> > > making the code generic. Context-switching and low-level TLB management
> > > is deeply architecture-specific and I worry that by trying to make this
> > > code common, we run the real risk of introducing subtle bugs on some
> > > architecture every time it is changed.
> > "Add generic asid code" and "move arm's into generic" are two things.
> > We could do
> > first and let architecture's maintainer to choose.
>
> If I understand the proposal being discussed, it involves basing that
> generic ASID allocation code around the arm64 implementation which I don't
> necessarily think is a good starting point.
...
>
> > > Furthermore, the algorithm we use
> > > on arm64 is designed to scale to large systems using DVM and may well be
> > > too complex and/or sub-optimal for architectures with different system
> > > topologies or TLB invalidation mechanisms.
> > It's just a asid algorithm not very complex and there is a callback
> > for architecture to define their
> > own local hart tlb flush. Seems it has nothing with DVM or tlb
> > broadcast mechanism.
>
> I'm pleased that you think the algorithm is not very complex, but I'm also
> worried that you might not have fully understood some of its finer details.
I understand your concern about my less understanding of asid
technology. Here is
my short-description of arm64 asid allocator: (If you find anything
wrong, please
correct me directly, thx :)
The asid allocator use five level check to reduce the cost of switch_mm.
1. Check if the asid version is the same (it's general)
2. Check reserved_asid which is set in rollover flush_context() and
the key point is
keep the same bit position with the current asid version instead
of input version.
3. Check if the position of bitmap is free then it could be set &
used directly.
4. find_next_zero_bit() (a little performance cost)
5. flush_context (this is the worst cost with increase current asid version)
Check is level by level and cost is also higher with the next level.
The design that
impressed me the most was reserved_asid and bitmap and the 2th level and 3th
level will prevent unnecessary find_next_zero_bit().
The atomic 64 bit asid is also ok for 32-bit system and it won't cost
a lot in 1th 2th 3th
level check.
The operation of set/clear mm_cpumask was removed in arm64 compared to arm32.
It seems no side effect on current arm64 system, but from software
meaning it's wrong.
So I think it should be reserved in generic version.
>
> The reason I mention DVM and TLB broadcasting is because, depending on
> the mechanisms in your architecture relating to those, it may be strictly
> required that all concurrently running threads of a process have the same
> ASID at any given point in time, or it may be that you really don't care.
>
> If you don't care, then the arm64 allocator is over-engineered and likely
> inefficient for your system. If you do care, then it's worth considering
> whether a lock is sufficient around the allocator if you don't expect high
> core counts. Another possibility is that you end up using only one ASID and
> invalidating the local TLB on every context switch. Yet another design
> would be to manage per-cpu ASID pools.
I'll keep my system use the same ASID for SMP + IOMMU :P
Yes, there are two styles of asid allocator: per-cpu ASID (MIPS) or
same ASID (ARM).
If the CPU couldn't support cache/tlb coherency maintian in hardware,
it should use
per-cpu ASID style because IPI is expensive and per-cpu ASID style
need more software
mechanism to improve performance (eg: delay cache flush). From software view the
same ASID is clearer and easier to build bigger system with more TLB caches.
I think the same ASID style is a more sensible choice for modern
processor and let it be
one of generic is reasonable.
>
> So rather than blindly copying the arm64 code, I suggest sitting down and
> designing something that fits to your architecture instead. You may end up
> with something that is both simpler and more efficient.
In fact, riscv folks have discussed a lot about arm's asid allocator
and I learned
a lot from the discussion:
https://lore.kernel.org/linux-riscv/[email protected]/
We are developing C-SKY and RISC-V ISA cpu cores and make it generic
is good for us.
Best Regards
Guo Ren
On Wed, Jun 19, 2019 at 07:51:03PM +0800, Guo Ren wrote:
> On Wed, Jun 19, 2019 at 4:54 PM Julien Grall <[email protected]> wrote:
> > On 6/19/19 9:07 AM, Guo Ren wrote:
> > > Move arm asid allocator code in a generic one is a agood idea, I've
> > > made a patchset for C-SKY and test is on processing, See:
> > > https://lore.kernel.org/linux-csky/[email protected]/
> > >
> > > If you plan to seperate it into generic one, I could co-work with you.
> >
> > Was the ASID allocator work out of box on C-Sky?
>
> Almost done, but one question:
> arm64 remove the code in switch_mm:
> cpumask_clear_cpu(cpu, mm_cpumask(prev));
> cpumask_set_cpu(cpu, mm_cpumask(next));
>
> Why? Although arm64 cache operations could affect all harts with CTC
> method of interconnect, I think we should keep these code for
> primitive integrity in linux. Because cpu_bitmap is in mm_struct
> instead of mm->context.
We didn't have a use for this in the arm64 code, so no point in
maintaining the mm_cpumask. On some arm32 systems (ARMv6) with no
hardware broadcast of some TLB/cache operations, we use it to track
where the task has run to issue IPI for TLB invalidation or some
deferred I-cache invalidation.
(there was also a potential optimisation on arm64 to avoid broadcast
TLBI if the task only ran on a single CPU but Will found that was rarely
the case on an SMP system because of rebalancing happening during
execve(), ending up with two bits set in the mm_cpumask)
The way you use it on csky is different from how it is done on arm. It
seems to clear the mask for the scheduled out (prev) task but this
wouldn't work on arm(64) since the TLB still contains prev entries
tagged with the scheduled out ASID. Whether it matters, I guess it
depends on the specifics of your hardware.
While the algorithm may seem fairly generic, the semantics have a few
corner cases specific to each architecture. See [1] for a description of
the semantics we need on arm64 (CnP is a feature where the hardware
threads of the same core can share the TLB; the original algorithm
violated the requirements when this feature was enabled).
BTW, if you find the algorithm fairly straightforward ;), see this
bug-fix which took a formal model to identify: a8ffaaa060b8 ("arm64:
asid: Do not replace active_asids if already 0").
[1] https://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/kernel-tla.git/tree/asidalloc.tla#n79
--
Catalin
Thx Catalin,
On Fri, Jun 21, 2019 at 10:16 PM Catalin Marinas
<[email protected]> wrote:
>
> On Wed, Jun 19, 2019 at 07:51:03PM +0800, Guo Ren wrote:
> > On Wed, Jun 19, 2019 at 4:54 PM Julien Grall <[email protected]> wrote:
> > > On 6/19/19 9:07 AM, Guo Ren wrote:
> > > > Move arm asid allocator code in a generic one is a agood idea, I've
> > > > made a patchset for C-SKY and test is on processing, See:
> > > > https://lore.kernel.org/linux-csky/[email protected]/
> > > >
> > > > If you plan to seperate it into generic one, I could co-work with you.
> > >
> > > Was the ASID allocator work out of box on C-Sky?
> >
> > Almost done, but one question:
> > arm64 remove the code in switch_mm:
> > cpumask_clear_cpu(cpu, mm_cpumask(prev));
> > cpumask_set_cpu(cpu, mm_cpumask(next));
> >
> > Why? Although arm64 cache operations could affect all harts with CTC
> > method of interconnect, I think we should keep these code for
> > primitive integrity in linux. Because cpu_bitmap is in mm_struct
> > instead of mm->context.
>
> We didn't have a use for this in the arm64 code, so no point in
> maintaining the mm_cpumask. On some arm32 systems (ARMv6) with no
> hardware broadcast of some TLB/cache operations, we use it to track
> where the task has run to issue IPI for TLB invalidation or some
> deferred I-cache invalidation.
The operation of set/clear mm_cpumask was removed in arm64 compared to
arm32. It seems no side effect on current arm64 system, but from
software meaning it's wrong.
I think we should keep mm_cpumask just like arm32.
>
> (there was also a potential optimisation on arm64 to avoid broadcast
> TLBI if the task only ran on a single CPU but Will found that was rarely
> the case on an SMP system because of rebalancing happening during
> execve(), ending up with two bits set in the mm_cpumask)
>
> The way you use it on csky is different from how it is done on arm. It
> seems to clear the mask for the scheduled out (prev) task but this
> wouldn't work on arm(64) since the TLB still contains prev entries
> tagged with the scheduled out ASID. Whether it matters, I guess it
> depends on the specifics of your hardware.
Sorry for the mistake quote, what I mean is what is done in arm32:
clear all bits of mm->cpu_mask in new_context(), and set back one by
one. Here is my patch:
https://lore.kernel.org/linux-csky/CAJF2gTQ0xQtQY1t-g9FgWaxfDXppMkFooCQzTFy7+ouwUfyA6w@mail.gmail.com/T/#m2ed464d2dfb45ac6f5547fb3936adf2da456cb65
>
> While the algorithm may seem fairly generic, the semantics have a few
> corner cases specific to each architecture. See [1] for a description of
> the semantics we need on arm64 (CnP is a feature where the hardware
> threads of the same core can share the TLB; the original algorithm
> violated the requirements when this feature was enabled).
C-SKY SMP is only one hart per core, but here is a patch [1] with my
thought on SMT duplicate tlb flush:
[1] https://lore.kernel.org/linux-csky/[email protected]/T/#u
For TLA+ model, I still need some learning before I could talk with you.
>
> BTW, if you find the algorithm fairly straightforward ;), see this
> bug-fix which took a formal model to identify: a8ffaaa060b8 ("arm64:
> asid: Do not replace active_asids if already 0").
I think it's one fo the cases that other archs also could get benefit
from arm's asid allocator code.
Btw, Is this detected by arm's aisd allocator TLA+ model ? Or a real
bug report ?
--
Best Regards
Guo Ren
ML: https://lore.kernel.org/linux-csky/
On Mon, Jun 24, 2019 at 12:35:35AM +0800, Guo Ren wrote:
> On Fri, Jun 21, 2019 at 10:16 PM Catalin Marinas
> <[email protected]> wrote:
> >
> > On Wed, Jun 19, 2019 at 07:51:03PM +0800, Guo Ren wrote:
> > > On Wed, Jun 19, 2019 at 4:54 PM Julien Grall <[email protected]> wrote:
> > > > On 6/19/19 9:07 AM, Guo Ren wrote:
> > > > > Move arm asid allocator code in a generic one is a agood idea, I've
> > > > > made a patchset for C-SKY and test is on processing, See:
> > > > > https://lore.kernel.org/linux-csky/[email protected]/
> > > > >
> > > > > If you plan to seperate it into generic one, I could co-work with you.
> > > >
> > > > Was the ASID allocator work out of box on C-Sky?
> > >
> > > Almost done, but one question:
> > > arm64 remove the code in switch_mm:
> > > cpumask_clear_cpu(cpu, mm_cpumask(prev));
> > > cpumask_set_cpu(cpu, mm_cpumask(next));
> > >
> > > Why? Although arm64 cache operations could affect all harts with CTC
> > > method of interconnect, I think we should keep these code for
> > > primitive integrity in linux. Because cpu_bitmap is in mm_struct
> > > instead of mm->context.
> >
> > We didn't have a use for this in the arm64 code, so no point in
> > maintaining the mm_cpumask. On some arm32 systems (ARMv6) with no
> > hardware broadcast of some TLB/cache operations, we use it to track
> > where the task has run to issue IPI for TLB invalidation or some
> > deferred I-cache invalidation.
> The operation of set/clear mm_cpumask was removed in arm64 compared to
> arm32. It seems no side effect on current arm64 system, but from
> software meaning it's wrong.
> I think we should keep mm_cpumask just like arm32.
It was a while ago now, but I remember the atomic update of the mm_cpumask
being quite expensive when I was profiling this stuff, so I removed it
because we don't need it for arm64 (at least, it doesn't allow us to
optimise our shootdowns in practice).
I still think this is over-engineered for what you want on c-sky and making
this code generic is a mistake.
Will
On Thu, Jun 20, 2019 at 05:33:03PM +0800, Guo Ren wrote:
> On Wed, Jun 19, 2019 at 8:39 PM Will Deacon <[email protected]> wrote:
> >
> > On Wed, Jun 19, 2019 at 08:18:04PM +0800, Guo Ren wrote:
> > > On Wed, Jun 19, 2019 at 5:12 PM Will Deacon <[email protected]> wrote:
> > > > This is one place where I'd actually prefer not to go down the route of
> > > > making the code generic. Context-switching and low-level TLB management
> > > > is deeply architecture-specific and I worry that by trying to make this
> > > > code common, we run the real risk of introducing subtle bugs on some
> > > > architecture every time it is changed.
> > > "Add generic asid code" and "move arm's into generic" are two things.
> > > We could do
> > > first and let architecture's maintainer to choose.
> >
> > If I understand the proposal being discussed, it involves basing that
> > generic ASID allocation code around the arm64 implementation which I don't
> > necessarily think is a good starting point.
> ...
> >
> > > > Furthermore, the algorithm we use
> > > > on arm64 is designed to scale to large systems using DVM and may well be
> > > > too complex and/or sub-optimal for architectures with different system
> > > > topologies or TLB invalidation mechanisms.
> > > It's just a asid algorithm not very complex and there is a callback
> > > for architecture to define their
> > > own local hart tlb flush. Seems it has nothing with DVM or tlb
> > > broadcast mechanism.
> >
> > I'm pleased that you think the algorithm is not very complex, but I'm also
> > worried that you might not have fully understood some of its finer details.
> I understand your concern about my less understanding of asid
> technology. Here is
> my short-description of arm64 asid allocator: (If you find anything
> wrong, please
> correct me directly, thx :)
The complexity mainly comes from the fact that this thing runs concurrently
with itself without synchronization on the fast-path. Coupled with the
need to use the same ASID for all threads of a task, you end up in fiddly
situations where rollover can occur on one CPU whilst another CPU is trying
to schedule a thread of a task that already has threads running in
userspace.
However, it's architecture-specific whether or not you care about that
scenario.
> > The reason I mention DVM and TLB broadcasting is because, depending on
> > the mechanisms in your architecture relating to those, it may be strictly
> > required that all concurrently running threads of a process have the same
> > ASID at any given point in time, or it may be that you really don't care.
> >
> > If you don't care, then the arm64 allocator is over-engineered and likely
> > inefficient for your system. If you do care, then it's worth considering
> > whether a lock is sufficient around the allocator if you don't expect high
> > core counts. Another possibility is that you end up using only one ASID and
> > invalidating the local TLB on every context switch. Yet another design
> > would be to manage per-cpu ASID pools.
> I'll keep my system use the same ASID for SMP + IOMMU :P
You will want a separate allocator for that:
https://lkml.kernel.org/r/[email protected]
> Yes, there are two styles of asid allocator: per-cpu ASID (MIPS) or
> same ASID (ARM).
> If the CPU couldn't support cache/tlb coherency maintian in hardware,
> it should use
> per-cpu ASID style because IPI is expensive and per-cpu ASID style
> need more software
> mechanism to improve performance (eg: delay cache flush). From software view the
> same ASID is clearer and easier to build bigger system with more TLB caches.
>
> I think the same ASID style is a more sensible choice for modern
> processor and let it be
> one of generic is reasonable.
I'm not sure I agree. x86, for example, is better off using a different
algorithm for allocating its PCIDs.
> > So rather than blindly copying the arm64 code, I suggest sitting down and
> > designing something that fits to your architecture instead. You may end up
> > with something that is both simpler and more efficient.
> In fact, riscv folks have discussed a lot about arm's asid allocator
> and I learned
> a lot from the discussion:
> https://lore.kernel.org/linux-riscv/[email protected]/
If you require all threads of the same process to have the same ASID, then
that patch looks broken to me.
Will
On Mon, Jun 24, 2019 at 12:35:35AM +0800, Guo Ren wrote:
> On Fri, Jun 21, 2019 at 10:16 PM Catalin Marinas
> <[email protected]> wrote:
> > BTW, if you find the algorithm fairly straightforward ;), see this
> > bug-fix which took a formal model to identify: a8ffaaa060b8 ("arm64:
> > asid: Do not replace active_asids if already 0").
[...]
> Btw, Is this detected by arm's aisd allocator TLA+ model ? Or a real
> bug report ?
This specific bug was found by the TLA+ model checker (at the time we
were actually tracking down another bug with multi-threaded CPU sharing
the TLB, bug also confirmed by the formal model).
--
Catalin
On Mon, 24 Jun 2019 03:40:07 PDT (-0700), [email protected] wrote:
> On Thu, Jun 20, 2019 at 05:33:03PM +0800, Guo Ren wrote:
>> On Wed, Jun 19, 2019 at 8:39 PM Will Deacon <[email protected]> wrote:
>> >
>> > On Wed, Jun 19, 2019 at 08:18:04PM +0800, Guo Ren wrote:
>> > > On Wed, Jun 19, 2019 at 5:12 PM Will Deacon <[email protected]> wrote:
>> > > > This is one place where I'd actually prefer not to go down the route of
>> > > > making the code generic. Context-switching and low-level TLB management
>> > > > is deeply architecture-specific and I worry that by trying to make this
>> > > > code common, we run the real risk of introducing subtle bugs on some
>> > > > architecture every time it is changed.
>> > > "Add generic asid code" and "move arm's into generic" are two things.
>> > > We could do
>> > > first and let architecture's maintainer to choose.
>> >
>> > If I understand the proposal being discussed, it involves basing that
>> > generic ASID allocation code around the arm64 implementation which I don't
>> > necessarily think is a good starting point.
>> ...
>> >
>> > > > Furthermore, the algorithm we use
>> > > > on arm64 is designed to scale to large systems using DVM and may well be
>> > > > too complex and/or sub-optimal for architectures with different system
>> > > > topologies or TLB invalidation mechanisms.
>> > > It's just a asid algorithm not very complex and there is a callback
>> > > for architecture to define their
>> > > own local hart tlb flush. Seems it has nothing with DVM or tlb
>> > > broadcast mechanism.
>> >
>> > I'm pleased that you think the algorithm is not very complex, but I'm also
>> > worried that you might not have fully understood some of its finer details.
>> I understand your concern about my less understanding of asid
>> technology. Here is
>> my short-description of arm64 asid allocator: (If you find anything
>> wrong, please
>> correct me directly, thx :)
>
> The complexity mainly comes from the fact that this thing runs concurrently
> with itself without synchronization on the fast-path. Coupled with the
> need to use the same ASID for all threads of a task, you end up in fiddly
> situations where rollover can occur on one CPU whilst another CPU is trying
> to schedule a thread of a task that already has threads running in
> userspace.
>
> However, it's architecture-specific whether or not you care about that
> scenario.
>
>> > The reason I mention DVM and TLB broadcasting is because, depending on
>> > the mechanisms in your architecture relating to those, it may be strictly
>> > required that all concurrently running threads of a process have the same
>> > ASID at any given point in time, or it may be that you really don't care.
>> >
>> > If you don't care, then the arm64 allocator is over-engineered and likely
>> > inefficient for your system. If you do care, then it's worth considering
>> > whether a lock is sufficient around the allocator if you don't expect high
>> > core counts. Another possibility is that you end up using only one ASID and
>> > invalidating the local TLB on every context switch. Yet another design
>> > would be to manage per-cpu ASID pools.
FWIW: right now we don't have any implementations that support ASIDs, so we're
really not ready to make these sort of decisions because we just don't know
what systems are going to look like. While it's a fun intellectual exercise to
try to design an allocator that would work acceptably on systems of various
shapes, there's no way to test this for performance or correctness right now so
I wouldn't be comfortable taking anything. If you're really interested, the
right place to start is the RTL
https://github.com/chipsalliance/rocket-chip/blob/master/src/main/scala/rocket/TLB.scala#L19
This is essentially the same problem we have for our spinlocks -- maybe start
with the TLB before doing a whole new pipeline, though :)
>> I'll keep my system use the same ASID for SMP + IOMMU :P
>
> You will want a separate allocator for that:
>
> https://lkml.kernel.org/r/[email protected]
>
>> Yes, there are two styles of asid allocator: per-cpu ASID (MIPS) or
>> same ASID (ARM).
>> If the CPU couldn't support cache/tlb coherency maintian in hardware,
>> it should use
>> per-cpu ASID style because IPI is expensive and per-cpu ASID style
>> need more software
>> mechanism to improve performance (eg: delay cache flush). From software view the
>> same ASID is clearer and easier to build bigger system with more TLB caches.
>>
>> I think the same ASID style is a more sensible choice for modern
>> processor and let it be
>> one of generic is reasonable.
>
> I'm not sure I agree. x86, for example, is better off using a different
> algorithm for allocating its PCIDs.
>
>> > So rather than blindly copying the arm64 code, I suggest sitting down and
>> > designing something that fits to your architecture instead. You may end up
>> > with something that is both simpler and more efficient.
>> In fact, riscv folks have discussed a lot about arm's asid allocator
>> and I learned
>> a lot from the discussion:
>> https://lore.kernel.org/linux-riscv/[email protected]/
>
> If you require all threads of the same process to have the same ASID, then
> that patch looks broken to me.
>
> Will
On 6/24/19 7:22 PM, Will Deacon wrote:
> On Mon, Jun 24, 2019 at 12:35:35AM +0800, Guo Ren wrote:
>> On Fri, Jun 21, 2019 at 10:16 PM Catalin Marinas
>> <[email protected]> wrote:
>>> On Wed, Jun 19, 2019 at 07:51:03PM +0800, Guo Ren wrote:
>>>> On Wed, Jun 19, 2019 at 4:54 PM Julien Grall <[email protected]> wrote:
>>>>> On 6/19/19 9:07 AM, Guo Ren wrote:
>>>>>> Move arm asid allocator code in a generic one is a agood idea, I've
>>>>>> made a patchset for C-SKY and test is on processing, See:
>>>>>> https://lore.kernel.org/linux-csky/[email protected]/
>>>>>>
>>>>>> If you plan to seperate it into generic one, I could co-work with you.
>>>>> Was the ASID allocator work out of box on C-Sky?
>>>> Almost done, but one question:
>>>> arm64 remove the code in switch_mm:
>>>> cpumask_clear_cpu(cpu, mm_cpumask(prev));
>>>> cpumask_set_cpu(cpu, mm_cpumask(next));
>>>>
>>>> Why? Although arm64 cache operations could affect all harts with CTC
>>>> method of interconnect, I think we should keep these code for
>>>> primitive integrity in linux. Because cpu_bitmap is in mm_struct
>>>> instead of mm->context.
>>> We didn't have a use for this in the arm64 code, so no point in
>>> maintaining the mm_cpumask. On some arm32 systems (ARMv6) with no
>>> hardware broadcast of some TLB/cache operations, we use it to track
>>> where the task has run to issue IPI for TLB invalidation or some
>>> deferred I-cache invalidation.
>> The operation of set/clear mm_cpumask was removed in arm64 compared to
>> arm32. It seems no side effect on current arm64 system, but from
>> software meaning it's wrong.
>> I think we should keep mm_cpumask just like arm32.
> It was a while ago now, but I remember the atomic update of the mm_cpumask
> being quite expensive when I was profiling this stuff, so I removed it
> because we don't need it for arm64 (at least, it doesn't allow us to
> optimise our shootdowns in practice).
Hi Will,
I think mm_cpumask can be used for filtering the cpus that there are TBL
entries on.
The OS jitter can be reduced by invalidating TLB entries only on the
CPUs specified by mm_cpumask(mm).
As I mentioned in an earlier email, the 2.5% OS jitter can result in
over a factor of 20 slowdown for the same application [1].
Though it may be an extreme example, reducing the OS jitter has been an
issue in HPC environment.
I would like to avoid broadcast TLBI by using mm_cpumask on arm64, cloud
you please tell me more about the costs caused by updating mm_cpumask?
Here is my patch:
https://lkml.org/lkml/2019/6/17/703
[1] Ferreira, Kurt B., Patrick Bridges, and Ron Brightwell.
"Characterizing application sensitivity to OS interference using
kernel-level noise injection." Proceedings of the 2008 ACM/IEEE
conference on Supercomputing. IEEE Press, 2008.
Thanks,
QI Fuli
> I still think this is over-engineered for what you want on c-sky and making
> this code generic is a mistake.
>
> Will
On Thu, Jun 27, 2019 at 09:41:42AM +0000, [email protected] wrote:
>
> On 6/24/19 7:22 PM, Will Deacon wrote:
> > On Mon, Jun 24, 2019 at 12:35:35AM +0800, Guo Ren wrote:
> >> On Fri, Jun 21, 2019 at 10:16 PM Catalin Marinas
> >> <[email protected]> wrote:
> >>> On Wed, Jun 19, 2019 at 07:51:03PM +0800, Guo Ren wrote:
> >>>> On Wed, Jun 19, 2019 at 4:54 PM Julien Grall <[email protected]> wrote:
> >>>>> On 6/19/19 9:07 AM, Guo Ren wrote:
> >>>>>> Move arm asid allocator code in a generic one is a agood idea, I've
> >>>>>> made a patchset for C-SKY and test is on processing, See:
> >>>>>> https://lore.kernel.org/linux-csky/[email protected]/
> >>>>>>
> >>>>>> If you plan to seperate it into generic one, I could co-work with you.
> >>>>> Was the ASID allocator work out of box on C-Sky?
> >>>> Almost done, but one question:
> >>>> arm64 remove the code in switch_mm:
> >>>> cpumask_clear_cpu(cpu, mm_cpumask(prev));
> >>>> cpumask_set_cpu(cpu, mm_cpumask(next));
> >>>>
> >>>> Why? Although arm64 cache operations could affect all harts with CTC
> >>>> method of interconnect, I think we should keep these code for
> >>>> primitive integrity in linux. Because cpu_bitmap is in mm_struct
> >>>> instead of mm->context.
> >>> We didn't have a use for this in the arm64 code, so no point in
> >>> maintaining the mm_cpumask. On some arm32 systems (ARMv6) with no
> >>> hardware broadcast of some TLB/cache operations, we use it to track
> >>> where the task has run to issue IPI for TLB invalidation or some
> >>> deferred I-cache invalidation.
> >> The operation of set/clear mm_cpumask was removed in arm64 compared to
> >> arm32. It seems no side effect on current arm64 system, but from
> >> software meaning it's wrong.
> >> I think we should keep mm_cpumask just like arm32.
> > It was a while ago now, but I remember the atomic update of the mm_cpumask
> > being quite expensive when I was profiling this stuff, so I removed it
> > because we don't need it for arm64 (at least, it doesn't allow us to
> > optimise our shootdowns in practice).
>
> I think mm_cpumask can be used for filtering the cpus that there are TBL
> entries on.
I'm aware that you want to use IPIs for broadcasting TLB invalidation
but that is only tangentially related to this thread, which is about the
current ASID algorithm and the need to update mm_cpumask today.
Please don't conflate the two threads; I already made my position reasonably
clear:
https://lore.kernel.org/linux-arm-kernel/[email protected]/
I will follow-up with another reply there.
Will
Hi Marinas,
Thx for the reply
On Mon, Jun 24, 2019 at 11:38 PM Catalin Marinas
<[email protected]> wrote:
>
> On Mon, Jun 24, 2019 at 12:35:35AM +0800, Guo Ren wrote:
> > On Fri, Jun 21, 2019 at 10:16 PM Catalin Marinas
> > <[email protected]> wrote:
> > > BTW, if you find the algorithm fairly straightforward ;), see this
> > > bug-fix which took a formal model to identify: a8ffaaa060b8 ("arm64:
> > > asid: Do not replace active_asids if already 0").
> [...]
> > Btw, Is this detected by arm's aisd allocator TLA+ model ? Or a real
> > bug report ?
>
> This specific bug was found by the TLA+ model checker (at the time we
> were actually tracking down another bug with multi-threaded CPU sharing
> the TLB, bug also confirmed by the formal model).
Could you tell me the ref-link about "another bug with multi-threaded
CPU sharing the TLB" ?
In my concept, the multi-core asid mechanism is also applicable to
multi-thread shared TLB, but it will generate redundant tlbflush. From
the software design logic, multi-threaded is treated as multi-cores
without error, but performance is not optimized. So in my RFC PATCH:
[1], I try to reduce multi-threads' tlbflush in one CPU core with the
fixed cpu ID bitmap hypothesis.
1: https://lore.kernel.org/linux-csky/CAJF2gTQ0xQtQY1t-g9FgWaxfDXppMkFooCQzTFy7+ouwUfyA6w@mail.gmail.com/T/#m2ed464d2dfb45ac6f5547fb3936adf2da456cb65
--
Best Regards
Guo Ren
ML: https://lore.kernel.org/linux-csky/
On Sun, Jun 30, 2019 at 12:29:46PM +0800, Guo Ren wrote:
> On Mon, Jun 24, 2019 at 11:38 PM Catalin Marinas
> <[email protected]> wrote:
> > On Mon, Jun 24, 2019 at 12:35:35AM +0800, Guo Ren wrote:
> > > On Fri, Jun 21, 2019 at 10:16 PM Catalin Marinas
> > > <[email protected]> wrote:
> > > > BTW, if you find the algorithm fairly straightforward ;), see this
> > > > bug-fix which took a formal model to identify: a8ffaaa060b8 ("arm64:
> > > > asid: Do not replace active_asids if already 0").
> > [...]
> > > Btw, Is this detected by arm's aisd allocator TLA+ model ? Or a real
> > > bug report ?
> >
> > This specific bug was found by the TLA+ model checker (at the time we
> > were actually tracking down another bug with multi-threaded CPU sharing
> > the TLB, bug also confirmed by the formal model).
>
> Could you tell me the ref-link about "another bug with multi-threaded
> CPU sharing the TLB" ?
>
> In my concept, the multi-core asid mechanism is also applicable to
> multi-thread shared TLB, but it will generate redundant tlbflush. From
> the software design logic, multi-threaded is treated as multi-cores
> without error, but performance is not optimized.
From the ASID reservation/allocation perspective, the mechanism is the
same between multi-threaded with a shared TLB and multi-core. On arm64,
a local_flush_tlb_all() on a thread invalidates the TLB for the other
threads of the same core.
The actual problem with multi-threaded CPUs is a lot more subtle.
Digging some internal email from 1.5 years ago and pasting it below
(where "current ASID algorithm" refers to the one prior to the fix and
CnP - Common Not Private - means shared TLBs on a multi-threaded CPU):
The current ASID roll-over algorithm allows for a small window where
active_asids for a CPU (P1) is different from the actual ASID in TTBR0.
This can lead to a roll-over on a different CPU (P2) allocating an ASID
(for a different task) which is still hardware-active on P1.
A TLBI on a CPU (or a peer CPU with CnP) does not guarantee that all the
entries corresponding to a valid TTBRx are removed as they can still be
speculatively loaded immediately after TLBI.
While having two different page tables with the same ASID on different
CPUs should be fine without CnP, it becomes problematic when CnP is
enabled:
P1 P2
-- --
TTBR0.BADDR = T1
TTBR0.ASID = A1
check_and_switch_context(T2,A2)
asid_maps[P1] = A2
goto fastpath
check_and_switch_context(T3,A0)
new_context
ASID roll-over allocates A1
since it is not active
TLBI ALL
speculate TTBR0.ASID = A1 entry
TTBR0.BADDR = T3
TTBR0.ASID = A1
TTBR0.BADDR = T2
TTBR0.ASID = A2
After this, the common TLB on P1 and P2 (CnP) contains entries
corresponding to the old T1 and A1. Task T3 using the same ASID A1 can
hit such entries. (T1,A1) will eventually be removed from the TLB on the
next context switch on P1 since tlb_flush_pending was set but this is
not guaranteed to happen.
The fix on arm64 (as part of 5ffdfaedfa0a - "arm64: mm: Support Common
Not Private translations") was to set the reserved TTBR0 in
check_and_switch_context(), preventing speculative loads into the TLB
being tagged with the wrong ASID. So this is specific to the ARM CPUs
behaviour w.r.t. speculative TLB loads, it may not be the case (yet) for
your architecture.
--
Catalin
Hello Catalin,
Thanks for sharing about CnP assid experience. See my comment below.
On Mon, Jul 1, 2019 at 5:17 PM Catalin Marinas
> From the ASID reservation/allocation perspective, the mechanism is the
> same between multi-threaded with a shared TLB and multi-core. On arm64,
> a local_flush_tlb_all() on a thread invalidates the TLB for the other
> threads of the same core.
>
> The actual problem with multi-threaded CPUs is a lot more subtle.
> Digging some internal email from 1.5 years ago and pasting it below
> (where "current ASID algorithm" refers to the one prior to the fix and
> CnP - Common Not Private - means shared TLBs on a multi-threaded CPU):
>
>
> The current ASID roll-over algorithm allows for a small window where
> active_asids for a CPU (P1) is different from the actual ASID in TTBR0.
> This can lead to a roll-over on a different CPU (P2) allocating an ASID
> (for a different task) which is still hardware-active on P1.
>
> A TLBI on a CPU (or a peer CPU with CnP) does not guarantee that all the
> entries corresponding to a valid TTBRx are removed as they can still be
> speculatively loaded immediately after TLBI.
>
> While having two different page tables with the same ASID on different
> CPUs should be fine without CnP, it becomes problematic when CnP is
> enabled:
>
> P1 P2
> -- --
> TTBR0.BADDR = T1
> TTBR0.ASID = A1
> check_and_switch_context(T2,A2)
> asid_maps[P1] = A2
> goto fastpath
> check_and_switch_context(T3,A0)
> new_context
> ASID roll-over allocates A1
> since it is not active
> TLBI ALL
> speculate TTBR0.ASID = A1 entry
> TTBR0.BADDR = T3
> TTBR0.ASID = A1
> TTBR0.BADDR = T2
> TTBR0.ASID = A2
>
> After this, the common TLB on P1 and P2 (CnP) contains entries
> corresponding to the old T1 and A1. Task T3 using the same ASID A1 can
> hit such entries. (T1,A1) will eventually be removed from the TLB on the
> next context switch on P1 since tlb_flush_pending was set but this is
> not guaranteed to happen.
>
>
> The fix on arm64 (as part of 5ffdfaedfa0a - "arm64: mm: Support Common
> Not Private translations") was to set the reserved TTBR0 in
> check_and_switch_context(), preventing speculative loads into the TLB
> being tagged with the wrong ASID. So this is specific to the ARM CPUs
> behaviour w.r.t. speculative TLB loads, it may not be the case (yet) for
> your architecture.
The most important thing is that TLBI ALL occurs between
"asid_maps[P1] = A2" and "TTBR0.BADDR = T2", then speculative
execution after TLBI which access to user space code/data will result
in a valid asid entry which re-filled into the TLB by PTW.
A similar problem should exist if C-SKY ISA supports SMT. Although the
C-SKY kernel prohibits the kernel from speculating on user space code
directly, ld/st can access user space memory in csky kernel mode.
Therefore, a similar problem occurs when it speculatively executes
copy_from / to_user codes in that window.
RISC-V ISA has a SUM setting bit that prevents the kernel from
speculating access to user space. So this problem has been bypassed
from the design.
I saw arm64 to prevent speculation by temporarily setting TTBR0.el1 to
a zero page table. Is that used to prevent speculative execution user
space code or just prevent ld/st in copy_use_* ?
--
Best Regards
Guo Ren
ML: https://lore.kernel.org/linux-csky/
On Tue, Jul 16, 2019 at 11:31:27AM +0800, Guo Ren wrote:
> I saw arm64 to prevent speculation by temporarily setting TTBR0.el1 to
> a zero page table. Is that used to prevent speculative execution user
> space code or just prevent ld/st in copy_use_* ?
Only to prevent explicit ld/st from user. On ARMv8.1+, we don't normally
use the TTBR0 trick but rather disable user space access using the PAN
(privileged access never) feature. However, I don't think PAN disables
speculative accesses, only explicit loads/stores. Also, with ARMv8.2
Linux uses the LDTR/STTR instructions in copy_*_user() which don't need
to disable PAN explicitly.
--
Catalin
Thx Will,
On Mon, Jun 24, 2019 at 6:40 PM Will Deacon <[email protected]> wrote:
> > I'll keep my system use the same ASID for SMP + IOMMU :P
>
> You will want a separate allocator for that:
>
> https://lkml.kernel.org/r/[email protected]
Yes, it is hard to maintain ASID between IOMMU and CPUMMU or different
system, because it's difficult to synchronize the IO_ASID when the CPU
ASID is rollover.
But we could still use hardware broadcast TLB invalidation instruction
to uniformly manage the ASID and IO_ASID, or OTHER_ASID in our IOMMU.
Welcome to join our disscusion:
"Introduce an implementation of IOMMU in linux-riscv"
9 Sep 2019, 10:45 Jade-room-I&II (Corinthia Hotel Lisbon) RISC-V MC
--
Best Regards
Guo Ren
ML: https://lore.kernel.org/linux-csky/
Thx Will for reply.
On Thu, Sep 12, 2019 at 3:03 PM Will Deacon <[email protected]> wrote:
>
> On Sun, Sep 08, 2019 at 07:52:55AM +0800, Guo Ren wrote:
> > On Mon, Jun 24, 2019 at 6:40 PM Will Deacon <[email protected]> wrote:
> > > > I'll keep my system use the same ASID for SMP + IOMMU :P
> > >
> > > You will want a separate allocator for that:
> > >
> > > https://lkml.kernel.org/r/[email protected]
> >
> > Yes, it is hard to maintain ASID between IOMMU and CPUMMU or different
> > system, because it's difficult to synchronize the IO_ASID when the CPU
> > ASID is rollover.
> > But we could still use hardware broadcast TLB invalidation instruction
> > to uniformly manage the ASID and IO_ASID, or OTHER_ASID in our IOMMU.
>
> That's probably a bad idea, because you'll likely stall execution on the
> CPU until the IOTLB has completed invalidation. In the case of ATS, I think
> an endpoint ATC is permitted to take over a minute to respond. In reality, I
> suspect the worst you'll ever see would be in the msec range, but that's
> still an unacceptable period of time to hold a CPU.
Just as I've said in the session that IOTLB invalidate delay is
another topic, My main proposal is to introduce stage1.pgd and
stage2.pgd as address space identifiers between different TLB systems
based on vmid, asid. My last part of sildes will show you how to
translate stage1/2.pgd to as/vmid in PCI ATS system and the method
could work with SMMU-v3 and intel Vt-d. (It's regret for me there is
no time to show you the whole slides.)
In our light IOMMU implementation, there's no IOTLB invalidate delay
problem. Becasue IOMMU is very close to CPU MMU and interconnect's
delay is the same with SMP CPUs MMU (no PCI, VM supported).
To solve the problem, we could define a async mode in sfence.vma.b to
slove the problem and finished with per_cpu_irq/exception.
--
Best Regards
Guo Ren
ML: https://lore.kernel.org/linux-csky/
On Sun, Sep 08, 2019 at 07:52:55AM +0800, Guo Ren wrote:
> On Mon, Jun 24, 2019 at 6:40 PM Will Deacon <[email protected]> wrote:
> > > I'll keep my system use the same ASID for SMP + IOMMU :P
> >
> > You will want a separate allocator for that:
> >
> > https://lkml.kernel.org/r/[email protected]
>
> Yes, it is hard to maintain ASID between IOMMU and CPUMMU or different
> system, because it's difficult to synchronize the IO_ASID when the CPU
> ASID is rollover.
> But we could still use hardware broadcast TLB invalidation instruction
> to uniformly manage the ASID and IO_ASID, or OTHER_ASID in our IOMMU.
That's probably a bad idea, because you'll likely stall execution on the
CPU until the IOTLB has completed invalidation. In the case of ATS, I think
an endpoint ATC is permitted to take over a minute to respond. In reality, I
suspect the worst you'll ever see would be in the msec range, but that's
still an unacceptable period of time to hold a CPU.
> Welcome to join our disscusion:
> "Introduce an implementation of IOMMU in linux-riscv"
> 9 Sep 2019, 10:45 Jade-room-I&II (Corinthia Hotel Lisbon) RISC-V MC
I attended this session, but it unfortunately raised many more questions
than it answered.
Will
Here is the presentation, any comments is welcome.
https://docs.google.com/presentation/d/1sc295JznVAfDIPieAqzjcyUkcHnNFQsK8FFqdoCY854/edit?usp=sharing
On Fri, Sep 13, 2019 at 3:13 PM Guo Ren <[email protected]> wrote:
>
> Another idea is seperate remote TLB invalidate into two instructions:
>
> - sfence.vma.b.asyc
> - sfence.vma.b.barrier // wait all async TLB invalidate operations finished for all harts.
>
> (I remember who mentioned me separate them into two instructions after session. Anup? Is the idea right ?)
>
> Actually, I never consider asyc TLB invalidate before, because current our light iommu did not need it.
>
> Thx all people attend the session :) Let's continue the talk.
>
>
> Guo Ren <[email protected]> 于 2019年9月12日周四 22:59写道:
>>
>> Thx Will for reply.
>>
>> On Thu, Sep 12, 2019 at 3:03 PM Will Deacon <[email protected]> wrote:
>> >
>> > On Sun, Sep 08, 2019 at 07:52:55AM +0800, Guo Ren wrote:
>> > > On Mon, Jun 24, 2019 at 6:40 PM Will Deacon <[email protected]> wrote:
>> > > > > I'll keep my system use the same ASID for SMP + IOMMU :P
>> > > >
>> > > > You will want a separate allocator for that:
>> > > >
>> > > > https://lkml.kernel.org/r/[email protected]
>> > >
>> > > Yes, it is hard to maintain ASID between IOMMU and CPUMMU or different
>> > > system, because it's difficult to synchronize the IO_ASID when the CPU
>> > > ASID is rollover.
>> > > But we could still use hardware broadcast TLB invalidation instruction
>> > > to uniformly manage the ASID and IO_ASID, or OTHER_ASID in our IOMMU.
>> >
>> > That's probably a bad idea, because you'll likely stall execution on the
>> > CPU until the IOTLB has completed invalidation. In the case of ATS, I think
>> > an endpoint ATC is permitted to take over a minute to respond. In reality, I
>> > suspect the worst you'll ever see would be in the msec range, but that's
>> > still an unacceptable period of time to hold a CPU.
>> Just as I've said in the session that IOTLB invalidate delay is
>> another topic, My main proposal is to introduce stage1.pgd and
>> stage2.pgd as address space identifiers between different TLB systems
>> based on vmid, asid. My last part of sildes will show you how to
>> translate stage1/2.pgd to as/vmid in PCI ATS system and the method
>> could work with SMMU-v3 and intel Vt-d. (It's regret for me there is
>> no time to show you the whole slides.)
>>
>> In our light IOMMU implementation, there's no IOTLB invalidate delay
>> problem. Becasue IOMMU is very close to CPU MMU and interconnect's
>> delay is the same with SMP CPUs MMU (no PCI, VM supported).
>>
>> To solve the problem, we could define a async mode in sfence.vma.b to
>> slove the problem and finished with per_cpu_irq/exception.
>>
>> --
>> Best Regards
>> Guo Ren
>>
>> ML: https://lore.kernel.org/linux-csky/
--
Best Regards
Guo Ren
ML: https://lore.kernel.org/linux-csky/
On Thu, 12 Sep 2019 07:02:56 PDT (-0700), [email protected] wrote:
> On Sun, Sep 08, 2019 at 07:52:55AM +0800, Guo Ren wrote:
>> On Mon, Jun 24, 2019 at 6:40 PM Will Deacon <[email protected]> wrote:
>> > > I'll keep my system use the same ASID for SMP + IOMMU :P
>> >
>> > You will want a separate allocator for that:
>> >
>> > https://lkml.kernel.org/r/[email protected]
>>
>> Yes, it is hard to maintain ASID between IOMMU and CPUMMU or different
>> system, because it's difficult to synchronize the IO_ASID when the CPU
>> ASID is rollover.
>> But we could still use hardware broadcast TLB invalidation instruction
>> to uniformly manage the ASID and IO_ASID, or OTHER_ASID in our IOMMU.
>
> That's probably a bad idea, because you'll likely stall execution on the
> CPU until the IOTLB has completed invalidation. In the case of ATS, I think
> an endpoint ATC is permitted to take over a minute to respond. In reality, I
> suspect the worst you'll ever see would be in the msec range, but that's
> still an unacceptable period of time to hold a CPU.
>
>> Welcome to join our disscusion:
>> "Introduce an implementation of IOMMU in linux-riscv"
>> 9 Sep 2019, 10:45 Jade-room-I&II (Corinthia Hotel Lisbon) RISC-V MC
>
> I attended this session, but it unfortunately raised many more questions
> than it answered.
Ya, we're a long way from figuring this out.
> -----Original Message-----
> From: [email protected] <linux-kernel-
> [email protected]> On Behalf Of Palmer Dabbelt
> Sent: Saturday, September 14, 2019 7:31 PM
> To: [email protected]
> Cc: [email protected]; Will Deacon <[email protected]>;
> [email protected]; [email protected]; [email protected];
> Arnd Bergmann <[email protected]>; [email protected];
> [email protected]; [email protected]; Anup Patel
> <[email protected]>; [email protected];
> [email protected]; Christoph Hellwig <[email protected]>; Atish Patra
> <[email protected]>; [email protected]; [email protected]; Paul
> Walmsley <[email protected]>; [email protected]; linux-
> [email protected]; [email protected]; linux-arm-
> [email protected]; [email protected]
> Subject: Re: [PATCH RFC 11/14] arm64: Move the ASID allocator code in a
> separate file
>
> On Thu, 12 Sep 2019 07:02:56 PDT (-0700), [email protected] wrote:
> > On Sun, Sep 08, 2019 at 07:52:55AM +0800, Guo Ren wrote:
> >> On Mon, Jun 24, 2019 at 6:40 PM Will Deacon <[email protected]> wrote:
> >> > > I'll keep my system use the same ASID for SMP + IOMMU :P
> >> >
> >> > You will want a separate allocator for that:
> >> >
> >> > https://lkml.kernel.org/r/20190610184714.6786-2-jean-philippe.bruck
> >> > [email protected]
> >>
> >> Yes, it is hard to maintain ASID between IOMMU and CPUMMU or
> >> different system, because it's difficult to synchronize the IO_ASID
> >> when the CPU ASID is rollover.
> >> But we could still use hardware broadcast TLB invalidation
> >> instruction to uniformly manage the ASID and IO_ASID, or OTHER_ASID in
> our IOMMU.
> >
> > That's probably a bad idea, because you'll likely stall execution on
> > the CPU until the IOTLB has completed invalidation. In the case of
> > ATS, I think an endpoint ATC is permitted to take over a minute to
> > respond. In reality, I suspect the worst you'll ever see would be in
> > the msec range, but that's still an unacceptable period of time to hold a
> CPU.
> >
> >> Welcome to join our disscusion:
> >> "Introduce an implementation of IOMMU in linux-riscv"
> >> 9 Sep 2019, 10:45 Jade-room-I&II (Corinthia Hotel Lisbon) RISC-V MC
> >
> > I attended this session, but it unfortunately raised many more
> > questions than it answered.
>
> Ya, we're a long way from figuring this out.
For everyone's reference, here is our first attempt at RISC-V ASID allocator:
http://archive.lwn.net:8080/linux-kernel/[email protected]/T/#u
Regards,
Anup
Hi,
On 13/09/2019 09:13, Guo Ren wrote:
> Another idea is seperate remote TLB invalidate into two instructions:
>
> - sfence.vma.b.asyc
> - sfence.vma.b.barrier // wait all async TLB invalidate operations
> finished for all harts.
It's not clear to me how this helps, but I probably don't have the whole
picture. If you have a place where it is safe to wait for the barrier to
complete, why not do the whole invalidate there?
> (I remember who mentioned me separate them into two instructions after
> session. Anup? Is the idea right ?)
>
> Actually, I never consider asyc TLB invalidate before, because current our
> light iommu did not need it.
>
> Thx all people attend the session :) Let's continue the talk.
>
>
> Guo Ren <[email protected] <mailto:[email protected]>> 于 2019年9月12日周
> 四 22:59写道:
>
> Thx Will for reply.
>
> On Thu, Sep 12, 2019 at 3:03 PM Will Deacon <[email protected]
> <mailto:[email protected]>> wrote:
> >
> > On Sun, Sep 08, 2019 at 07:52:55AM +0800, Guo Ren wrote:
> > > On Mon, Jun 24, 2019 at 6:40 PM Will Deacon <[email protected]
> <mailto:[email protected]>> wrote:
> > > > > I'll keep my system use the same ASID for SMP + IOMMU :P
> > > >
> > > > You will want a separate allocator for that:
> > > >
> > > >
> https://lkml.kernel.org/r/[email protected]
> > >
> > > Yes, it is hard to maintain ASID between IOMMU and CPUMMU or different
> > > system, because it's difficult to synchronize the IO_ASID when the CPU
> > > ASID is rollover.
> > > But we could still use hardware broadcast TLB invalidation instruction
> > > to uniformly manage the ASID and IO_ASID, or OTHER_ASID in our IOMMU.
> >
> > That's probably a bad idea, because you'll likely stall execution on the
> > CPU until the IOTLB has completed invalidation. In the case of ATS,
> I think
> > an endpoint ATC is permitted to take over a minute to respond. In
> reality, I
> > suspect the worst you'll ever see would be in the msec range, but that's
> > still an unacceptable period of time to hold a CPU.
> Just as I've said in the session that IOTLB invalidate delay is
> another topic, My main proposal is to introduce stage1.pgd and
> stage2.pgd as address space identifiers between different TLB systems
> based on vmid, asid. My last part of sildes will show you how to
> translate stage1/2.pgd to as/vmid in PCI ATS system and the method
> could work with SMMU-v3 and intel Vt-d. (It's regret for me there is
> no time to show you the whole slides.)
>
> In our light IOMMU implementation, there's no IOTLB invalidate delay
> problem. Becasue IOMMU is very close to CPU MMU and interconnect's
> delay is the same with SMP CPUs MMU (no PCI, VM supported).
>
> To solve the problem, we could define a async mode in sfence.vma.b to
> slove the problem and finished with per_cpu_irq/exception.
The solution I had to this problem is pinning the ASID [1] used by the
IOMMU, to prevent the CPU from recycling the ASID on rollover. This way
the CPU doesn't have to wait for IOMMU invalidations to complete, when
scheduling a task that might not even have anything to do with the IOMMU.
In the Arm SMMU, ASID and IOASID (PASID) are separate identifiers. IOASID
indexes an entry in the context descriptor table, which contains the ASID.
So with unpinned shared ASID you don't need to invalidate the ATC on
rollover, since the IOASID doesn't change, but you do need to modify the
context descriptor and invalidate cached versions of it.
Once you have pinned ASIDs, you could also declare that IOASID = ASID. I
don't remember finding an argument to strictly forbid it, even though ASID
and IOASID have different sizes on Arm (respectively 8/16 and 20 bits).
Thanks,
Jean
[1]
https://lore.kernel.org/linux-iommu/[email protected]/
On Mon, 16 Sep 2019 11:18:00 PDT (-0700), [email protected] wrote:
> On Sun, Sep 15, 2019 at 05:03:38AM +0000, Anup Patel wrote:
>>
>>
>> > -----Original Message-----
>> > From: [email protected] <linux-kernel-
>> > [email protected]> On Behalf Of Palmer Dabbelt
>> > Sent: Saturday, September 14, 2019 7:31 PM
>> > To: [email protected]
>> > Cc: [email protected]; Will Deacon <[email protected]>;
>> > [email protected]; [email protected]; [email protected];
>> > Arnd Bergmann <[email protected]>; [email protected];
>> > [email protected]; [email protected]; Anup Patel
>> > <[email protected]>; [email protected];
>> > [email protected]; Christoph Hellwig <[email protected]>; Atish Patra
>> > <[email protected]>; [email protected]; [email protected]; Paul
>> > Walmsley <[email protected]>; [email protected]; linux-
>> > [email protected]; [email protected]; linux-arm-
>> > [email protected]; [email protected]
>> > Subject: Re: [PATCH RFC 11/14] arm64: Move the ASID allocator code in a
>> > separate file
>> >
>> > On Thu, 12 Sep 2019 07:02:56 PDT (-0700), [email protected] wrote:
>> > > On Sun, Sep 08, 2019 at 07:52:55AM +0800, Guo Ren wrote:
>> > >> On Mon, Jun 24, 2019 at 6:40 PM Will Deacon <[email protected]> wrote:
>> > >> > > I'll keep my system use the same ASID for SMP + IOMMU :P
>> > >> >
>> > >> > You will want a separate allocator for that:
>> > >> >
>> > >> > https://lkml.kernel.org/r/20190610184714.6786-2-jean-philippe.bruck
>> > >> > [email protected]
>> > >>
>> > >> Yes, it is hard to maintain ASID between IOMMU and CPUMMU or
>> > >> different system, because it's difficult to synchronize the IO_ASID
>> > >> when the CPU ASID is rollover.
>> > >> But we could still use hardware broadcast TLB invalidation
>> > >> instruction to uniformly manage the ASID and IO_ASID, or OTHER_ASID in
>> > our IOMMU.
>> > >
>> > > That's probably a bad idea, because you'll likely stall execution on
>> > > the CPU until the IOTLB has completed invalidation. In the case of
>> > > ATS, I think an endpoint ATC is permitted to take over a minute to
>> > > respond. In reality, I suspect the worst you'll ever see would be in
>> > > the msec range, but that's still an unacceptable period of time to hold a
>> > CPU.
>> > >
>> > >> Welcome to join our disscusion:
>> > >> "Introduce an implementation of IOMMU in linux-riscv"
>> > >> 9 Sep 2019, 10:45 Jade-room-I&II (Corinthia Hotel Lisbon) RISC-V MC
>> > >
>> > > I attended this session, but it unfortunately raised many more
>> > > questions than it answered.
>> >
>> > Ya, we're a long way from figuring this out.
>>
>> For everyone's reference, here is our first attempt at RISC-V ASID allocator:
>> http://archive.lwn.net:8080/linux-kernel/[email protected]/T/#u
>
> With a reply stating that the patch "absolutely does not work" ;)
>
> What exactly do you want people to do with that? It's an awful lot of effort
> to review this sort of stuff and given that Guo Ren is talking about sharing
> page tables between the CPU and an accelerator, maybe you're better off
> stabilising Linux for the platforms that you can actually test rather than
> getting so far ahead of yourselves that you end up with a bunch of wasted
> work on patches that probably won't get merged any time soon.
>
> Seriously, they say "walk before you can run", but this is more "crawl
> before you can fly". What's the rush?
I agree, and I think I've been pretty clear here: we're not merging this ASID
stuff until we have a platform we can test on, particularly as the platforms we
have now already need some wacky hacks around TLB flushing that we haven't
gotten to the bottom of.
> Will
On Sun, Sep 15, 2019 at 05:03:38AM +0000, Anup Patel wrote:
>
>
> > -----Original Message-----
> > From: [email protected] <linux-kernel-
> > [email protected]> On Behalf Of Palmer Dabbelt
> > Sent: Saturday, September 14, 2019 7:31 PM
> > To: [email protected]
> > Cc: [email protected]; Will Deacon <[email protected]>;
> > [email protected]; [email protected]; [email protected];
> > Arnd Bergmann <[email protected]>; [email protected];
> > [email protected]; [email protected]; Anup Patel
> > <[email protected]>; [email protected];
> > [email protected]; Christoph Hellwig <[email protected]>; Atish Patra
> > <[email protected]>; [email protected]; [email protected]; Paul
> > Walmsley <[email protected]>; [email protected]; linux-
> > [email protected]; [email protected]; linux-arm-
> > [email protected]; [email protected]
> > Subject: Re: [PATCH RFC 11/14] arm64: Move the ASID allocator code in a
> > separate file
> >
> > On Thu, 12 Sep 2019 07:02:56 PDT (-0700), [email protected] wrote:
> > > On Sun, Sep 08, 2019 at 07:52:55AM +0800, Guo Ren wrote:
> > >> On Mon, Jun 24, 2019 at 6:40 PM Will Deacon <[email protected]> wrote:
> > >> > > I'll keep my system use the same ASID for SMP + IOMMU :P
> > >> >
> > >> > You will want a separate allocator for that:
> > >> >
> > >> > https://lkml.kernel.org/r/20190610184714.6786-2-jean-philippe.bruck
> > >> > [email protected]
> > >>
> > >> Yes, it is hard to maintain ASID between IOMMU and CPUMMU or
> > >> different system, because it's difficult to synchronize the IO_ASID
> > >> when the CPU ASID is rollover.
> > >> But we could still use hardware broadcast TLB invalidation
> > >> instruction to uniformly manage the ASID and IO_ASID, or OTHER_ASID in
> > our IOMMU.
> > >
> > > That's probably a bad idea, because you'll likely stall execution on
> > > the CPU until the IOTLB has completed invalidation. In the case of
> > > ATS, I think an endpoint ATC is permitted to take over a minute to
> > > respond. In reality, I suspect the worst you'll ever see would be in
> > > the msec range, but that's still an unacceptable period of time to hold a
> > CPU.
> > >
> > >> Welcome to join our disscusion:
> > >> "Introduce an implementation of IOMMU in linux-riscv"
> > >> 9 Sep 2019, 10:45 Jade-room-I&II (Corinthia Hotel Lisbon) RISC-V MC
> > >
> > > I attended this session, but it unfortunately raised many more
> > > questions than it answered.
> >
> > Ya, we're a long way from figuring this out.
>
> For everyone's reference, here is our first attempt at RISC-V ASID allocator:
> http://archive.lwn.net:8080/linux-kernel/[email protected]/T/#u
With a reply stating that the patch "absolutely does not work" ;)
What exactly do you want people to do with that? It's an awful lot of effort
to review this sort of stuff and given that Guo Ren is talking about sharing
page tables between the CPU and an accelerator, maybe you're better off
stabilising Linux for the platforms that you can actually test rather than
getting so far ahead of yourselves that you end up with a bunch of wasted
work on patches that probably won't get merged any time soon.
Seriously, they say "walk before you can run", but this is more "crawl
before you can fly". What's the rush?
Will
> -----Original Message-----
> From: [email protected] <linux-kernel-
> [email protected]> On Behalf Of Will Deacon
> Sent: Monday, September 16, 2019 11:48 PM
> To: Anup Patel <[email protected]>
> Cc: Palmer Dabbelt <[email protected]>; [email protected]; Will Deacon
> <[email protected]>; [email protected]; [email protected];
> [email protected]; Arnd Bergmann <[email protected]>;
> [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]; Christoph Hellwig <[email protected]>; Atish Patra
> <[email protected]>; [email protected]; [email protected]; Paul
> Walmsley <[email protected]>; [email protected]; linux-
> [email protected]; [email protected]; linux-arm-
> [email protected]; [email protected]
> Subject: Re: [PATCH RFC 11/14] arm64: Move the ASID allocator code in a
> separate file
>
> On Sun, Sep 15, 2019 at 05:03:38AM +0000, Anup Patel wrote:
> >
> >
> > > -----Original Message-----
> > > From: [email protected] <linux-kernel-
> > > [email protected]> On Behalf Of Palmer Dabbelt
> > > Sent: Saturday, September 14, 2019 7:31 PM
> > > To: [email protected]
> > > Cc: [email protected]; Will Deacon <[email protected]>;
> > > [email protected]; [email protected];
> [email protected];
> > > Arnd Bergmann <[email protected]>; [email protected];
> > > [email protected]; [email protected]; Anup Patel
> > > <[email protected]>; [email protected];
> > > [email protected]; Christoph Hellwig <[email protected]>; Atish
> > > Patra <[email protected]>; [email protected]; [email protected];
> > > Paul Walmsley <[email protected]>; [email protected];
> > > linux- [email protected]; [email protected];
> > > linux-arm- [email protected];
> > > [email protected]
> > > Subject: Re: [PATCH RFC 11/14] arm64: Move the ASID allocator code
> > > in a separate file
> > >
> > > On Thu, 12 Sep 2019 07:02:56 PDT (-0700), [email protected] wrote:
> > > > On Sun, Sep 08, 2019 at 07:52:55AM +0800, Guo Ren wrote:
> > > >> On Mon, Jun 24, 2019 at 6:40 PM Will Deacon <[email protected]>
> wrote:
> > > >> > > I'll keep my system use the same ASID for SMP + IOMMU :P
> > > >> >
> > > >> > You will want a separate allocator for that:
> > > >> >
> > > >> > https://lkml.kernel.org/r/20190610184714.6786-2-jean-philippe.b
> > > >> > ruck
> > > >> > [email protected]
> > > >>
> > > >> Yes, it is hard to maintain ASID between IOMMU and CPUMMU or
> > > >> different system, because it's difficult to synchronize the
> > > >> IO_ASID when the CPU ASID is rollover.
> > > >> But we could still use hardware broadcast TLB invalidation
> > > >> instruction to uniformly manage the ASID and IO_ASID, or
> > > >> OTHER_ASID in
> > > our IOMMU.
> > > >
> > > > That's probably a bad idea, because you'll likely stall execution
> > > > on the CPU until the IOTLB has completed invalidation. In the case
> > > > of ATS, I think an endpoint ATC is permitted to take over a minute
> > > > to respond. In reality, I suspect the worst you'll ever see would
> > > > be in the msec range, but that's still an unacceptable period of
> > > > time to hold a
> > > CPU.
> > > >
> > > >> Welcome to join our disscusion:
> > > >> "Introduce an implementation of IOMMU in linux-riscv"
> > > >> 9 Sep 2019, 10:45 Jade-room-I&II (Corinthia Hotel Lisbon) RISC-V
> > > >> MC
> > > >
> > > > I attended this session, but it unfortunately raised many more
> > > > questions than it answered.
> > >
> > > Ya, we're a long way from figuring this out.
> >
> > For everyone's reference, here is our first attempt at RISC-V ASID allocator:
> > http://archive.lwn.net:8080/linux-kernel/20190329045111.14040-1-anup.p
> > [email protected]/T/#u
>
> With a reply stating that the patch "absolutely does not work" ;)
This patch was tested on existing HW (which does not have ASID implementation)
and tested on QEMU (which has very simplistic Implementation of ASID).
When I asked Gary Guo about way to get access to their HW (in same patch
email thread), I did not get any reply. After so many months passed, I now
doubt the his comment "absolutely does not work".
>
> What exactly do you want people to do with that? It's an awful lot of effort to
> review this sort of stuff and given that Guo Ren is talking about sharing page
> tables between the CPU and an accelerator, maybe you're better off
> stabilising Linux for the platforms that you can actually test rather than
> getting so far ahead of yourselves that you end up with a bunch of wasted
> work on patches that probably won't get merged any time soon.
The intention of the ASID patch was to encourage RISC-V implementations
having ASID in HW and also ensure that things don't break on existing HW.
I don't see our efforts being wasted in trying to make Linux RISC-V feature
complete and encouraging more feature rich RISC-V CPUs.
Delays in merging patches are fine as long as people have something to try
on their RISC-V CPU implementations.
>
> Seriously, they say "walk before you can run", but this is more "crawl before
> you can fly". What's the rush?
>
> Will
Regards,
Anup
Hi,
On Mon, Sep 16, 2019 at 8:57 PM Jean-Philippe Brucker
<[email protected]> wrote:
> On 13/09/2019 09:13, Guo Ren wrote:
> > Another idea is seperate remote TLB invalidate into two instructions:
> >
> > - sfence.vma.b.asyc
> > - sfence.vma.b.barrier // wait all async TLB invalidate operations
> > finished for all harts.
>
> It's not clear to me how this helps, but I probably don't have the whole
> picture. If you have a place where it is safe to wait for the barrier to
> complete, why not do the whole invalidate there?
>
> > (I remember who mentioned me separate them into two instructions after
> > session. Anup? Is the idea right ?)
Forget it, I still use irq signal in my formal proposal [1]. I also
couldn't image the whole picture :P
> > To solve the problem, we could define a async mode in sfence.vma.b to
> > slove the problem and finished with per_cpu_irq/exception.
>
> The solution I had to this problem is pinning the ASID [1] used by the
> IOMMU, to prevent the CPU from recycling the ASID on rollover. This way
> the CPU doesn't have to wait for IOMMU invalidations to complete, when
> scheduling a task that might not even have anything to do with the IOMMU.
>
> In the Arm SMMU, ASID and IOASID (PASID) are separate identifiers. IOASID
> indexes an entry in the context descriptor table, which contains the ASID.
> So with unpinned shared ASID you don't need to invalidate the ATC on
> rollover, since the IOASID doesn't change, but you do need to modify the
> context descriptor and invalidate cached versions of it.
The terminology confused me a lot. I perfer use PASID for IOMMU and
ASID is for CPU.
Arm's entry of the context descriptor table contains a "IOASID"
IOASID != ASID for CPU_TLB and IOMMU_TLB.
When you say "since the IOASID doesn't change",Is it PASID or my IOASID ? -_*!
PASID in PCI-sig was used to determine transfer address space.
For intel, the entry which is indexed by PASID also contain S1/S2.PGD
and DID(VMID).
For arm, the entry which is indexed by PASID only contain S1.PGD and
IOASID. Compare to Intel Vt-d Scalable mode, arm's design can't
support PCI Virtual Function.
>
> Once you have pinned ASIDs, you could also declare that IOASID = ASID. I
> don't remember finding an argument to strictly forbid it, even though ASID
> and IOASID have different sizes on Arm (respectively 8/16 and 20 bits).
ASID and IOASID are hard to keep the same between CPU system and IOMMU
system. So I introduce S1/S2.PGD.PPN as a bridge between CPUs and
IOMMUs.
See my proposal [1]
1: https://lore.kernel.org/linux-csky/[email protected]/T/#u
--
Best Regards
Guo Ren
ML: https://lore.kernel.org/linux-csky/
Hi,
On Tue, Sep 17, 2019 at 11:42 AM Anup Patel <[email protected]> wrote:
> >
> > With a reply stating that the patch "absolutely does not work" ;)
>
> This patch was tested on existing HW (which does not have ASID implementation)
> and tested on QEMU (which has very simplistic Implementation of ASID).
>
> When I asked Gary Guo about way to get access to their HW (in same patch
> email thread), I did not get any reply. After so many months passed, I now
> doubt the his comment "absolutely does not work".
> >
> > What exactly do you want people to do with that? It's an awful lot of effort to
> > review this sort of stuff and given that Guo Ren is talking about sharing page
> > tables between the CPU and an accelerator, maybe you're better off
> > stabilising Linux for the platforms that you can actually test rather than
> > getting so far ahead of yourselves that you end up with a bunch of wasted
> > work on patches that probably won't get merged any time soon.
>
> The intention of the ASID patch was to encourage RISC-V implementations
> having ASID in HW and also ensure that things don't break on existing HW.
>
> I don't see our efforts being wasted in trying to make Linux RISC-V feature
> complete and encouraging more feature rich RISC-V CPUs.
>
> Delays in merging patches are fine as long as people have something to try
> on their RISC-V CPU implementations.
>
I'm the supporter of that patch:
http://archive.lwn.net:8080/linux-kernel/[email protected]/T/#u
Because it implicit hw broadcast tlb invalidation optimization.
Honestly it's not suitable for remote tlb flush with software IPI, but
it's still much better than current RISC-V's.
I'll try it on our hardware: 910. wait a moment :)
--
Best Regards
Guo Ren
ML: https://lore.kernel.org/linux-csky/
On Thu, Sep 19, 2019 at 09:07:15PM +0800, Guo Ren wrote:
> > The solution I had to this problem is pinning the ASID [1] used by the
> > IOMMU, to prevent the CPU from recycling the ASID on rollover. This way
> > the CPU doesn't have to wait for IOMMU invalidations to complete, when
> > scheduling a task that might not even have anything to do with the IOMMU.
> >
>
> > In the Arm SMMU, ASID and IOASID (PASID) are separate identifiers. IOASID
> > indexes an entry in the context descriptor table, which contains the ASID.
> > So with unpinned shared ASID you don't need to invalidate the ATC on
> > rollover, since the IOASID doesn't change, but you do need to modify the
> > context descriptor and invalidate cached versions of it.
> The terminology confused me a lot. I perfer use PASID for IOMMU and
> ASID is for CPU.
> Arm's entry of the context descriptor table contains a "IOASID"
The terminology I've been using so far is different:
* IOASID is PASID
* The entry in the context descriptor table contains an ASID, which
is either "shared" with CPUs or "private" to the SMMU (the SMMU spec
says "shared" or "non-shared").
* So the CPU and SMMU TLBs use ASIDs, and the PCI ATC uses IOASID
> IOASID != ASID for CPU_TLB and IOMMU_TLB.
>
> When you say "since the IOASID doesn't change",Is it PASID or my IOASID ? -_*!
I was talking about PASID. Maybe we can drop "IOASID" and talk only
about ASID and PASID :)
> PASID in PCI-sig was used to determine transfer address space.
> For intel, the entry which is indexed by PASID also contain S1/S2.PGD
> and DID(VMID).
> For arm, the entry which is indexed by PASID only contain S1.PGD and
> IOASID. Compare to Intel Vt-d Scalable mode, arm's design can't
> support PCI Virtual Function.
The SMMU does support PCI Virtual Function - an hypervisor can assign a
VF to a guest, and let that guest partition the VF into smaller contexts
by using PASID. What it can't support is assigning partitions of a PCI
function (VF or PF) to multiple Virtual Machines, since there is a
single S2 PGD per function (in the Stream Table Entry), rather than one
S2 PGD per PASID context.
Thanks,
Jean
> > Once you have pinned ASIDs, you could also declare that IOASID = ASID. I
> > don't remember finding an argument to strictly forbid it, even though ASID
> > and IOASID have different sizes on Arm (respectively 8/16 and 20 bits).
> ASID and IOASID are hard to keep the same between CPU system and IOMMU
> system. So I introduce S1/S2.PGD.PPN as a bridge between CPUs and
> IOMMUs.
> See my proposal [1]
>
> 1: https://lore.kernel.org/linux-csky/[email protected]/T/#u
> --
> Best Regards
> Guo Ren
>
> ML: https://lore.kernel.org/linux-csky/
On Fri, Sep 20, 2019 at 08:07:38AM +0800, Guo Ren wrote:
> On Thu, Sep 19, 2019 at 11:18 PM Jean-Philippe Brucker
> <[email protected]> wrote:
>
> >
> > The SMMU does support PCI Virtual Function - an hypervisor can assign a
> > VF to a guest, and let that guest partition the VF into smaller contexts
> > by using PASID. What it can't support is assigning partitions of a PCI
> > function (VF or PF) to multiple Virtual Machines, since there is a
> > single S2 PGD per function (in the Stream Table Entry), rather than one
> > S2 PGD per PASID context.
> >
> In my concept, the two sentences "The SMMU does support PCI Virtual
> Functio" v.s. "What it can't support is assigning partitions of a PCI
> function (VF or PF) to multiple Virtual Machines" are conflict and I
> don't want to play naming game :)
That's fine. But to prevent the spread of misinformation: Arm SMMU
supports PCI Virtual Functions.
Thanks,
Jean
On Thu, Sep 19, 2019 at 11:18 PM Jean-Philippe Brucker
<[email protected]> wrote:
>
> The SMMU does support PCI Virtual Function - an hypervisor can assign a
> VF to a guest, and let that guest partition the VF into smaller contexts
> by using PASID. What it can't support is assigning partitions of a PCI
> function (VF or PF) to multiple Virtual Machines, since there is a
> single S2 PGD per function (in the Stream Table Entry), rather than one
> S2 PGD per PASID context.
>
In my concept, the two sentences "The SMMU does support PCI Virtual
Functio" v.s. "What it can't support is assigning partitions of a PCI
function (VF or PF) to multiple Virtual Machines" are conflict and I
don't want to play naming game :)
--
Best Regards
Guo Ren
ML: https://lore.kernel.org/linux-csky/