LinuxLists.cc - [PATCH v6 0/7] KVM: arm64: Normalize cache configuration

2023-01-07 09:55:42

Subject: [PATCH v6 0/7] KVM: arm64: Normalize cache configuration

2023-01-07 09:56:33

Subject: [PATCH v6 6/7] KVM: arm64: Mask FEAT_CCIDX

The CCSIDR access handler masks the associativity bits according to the
bit layout for processors without FEAT_CCIDX. KVM also assumes CCSIDR is
32-bit where it will be 64-bit if FEAT_CCIDX is enabled. Mask FEAT_CCIDX
so that these assumptions hold.

Suggested-by: Marc Zyngier <[email protected]>
Signed-off-by: Akihiko Odaki <[email protected]>
---
arch/arm64/kvm/sys_regs.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d5ee52d6bf73..5617de916c80 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1155,6 +1155,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon),
pmuver_to_perfmon(vcpu_pmuver(vcpu)));
break;
+ case SYS_ID_AA64MMFR2_EL1:
+ val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
+ break;
+ case SYS_ID_MMFR4_EL1:
+ val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
+ break;
}

return val;
@@ -1718,6 +1724,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {

{ SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr },
{ SYS_DESC(SYS_CLIDR_EL1), access_clidr },
+ { SYS_DESC(SYS_CCSIDR2_EL1), undef_access },
{ SYS_DESC(SYS_SMIDR_EL1), undef_access },
{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
{ SYS_DESC(SYS_CTR_EL0), access_ctr },
@@ -2219,6 +2226,10 @@ static const struct sys_reg_desc cp15_regs[] = {

{ Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr },
{ Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr },
+
+ /* CCSIDR2 */
+ { Op1(1), CRn( 0), CRm( 0), Op2(2), undef_access },
+
{ Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, CSSELR_EL1 },
};

--
2.38.1

2023-01-07 10:16:13

by Akihiko Odaki

[permalink] [raw]

Subject: [PATCH v6 7/7] KVM: arm64: Normalize cache configuration

Before this change, the cache configuration of the physical CPU was
exposed to vcpus. This is problematic because the cache configuration a
vcpu sees varies when it migrates between vcpus with different cache
configurations.

Fabricate cache configuration from the sanitized value, which holds the
CTR_EL0 value the userspace sees regardless of which physical CPU it
resides on.

CLIDR_EL1 and CCSIDR_EL1 are now writable from the userspace so that
the VMM can restore the values saved with the old kernel.

Suggested-by: Marc Zyngier <[email protected]>
Signed-off-by: Akihiko Odaki <[email protected]>
---
arch/arm64/include/asm/cache.h | 3 +
arch/arm64/include/asm/kvm_host.h | 4 +
arch/arm64/kvm/reset.c | 1 +
arch/arm64/kvm/sys_regs.c | 252 ++++++++++++++++++------------
4 files changed, 164 insertions(+), 96 deletions(-)

diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index ab7133654a72..a51e6e8f3171 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -22,6 +22,9 @@
#define CLIDR_CTYPE(clidr, level) \
(((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))

+/* Ttypen, bits [2(n - 1) + 34 : 2(n - 1) + 33], for n = 1 to 7 */
+#define CLIDR_TTYPE_SHIFT(level) (2 * ((level) - 1) + CLIDR_EL1_Ttypen_SHIFT)
+
/*
* Memory returned by kmalloc() may be used for DMA, so we must make
* sure that all such allocations are cache aligned. Otherwise,
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 374390a9212e..496602e0b299 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -252,6 +252,7 @@ struct kvm_vcpu_fault_info {
enum vcpu_sysreg {
__INVALID_SYSREG__, /* 0 is reserved as an invalid value */
MPIDR_EL1, /* MultiProcessor Affinity Register */
+ CLIDR_EL1, /* Cache Level ID Register */
CSSELR_EL1, /* Cache Size Selection Register */
SCTLR_EL1, /* System Control Register */
ACTLR_EL1, /* Auxiliary Control Register */
@@ -501,6 +502,9 @@ struct kvm_vcpu_arch {
u64 last_steal;
gpa_t base;
} steal;
+
+ /* Per-vcpu CCSIDR override or NULL */
+ u32 *ccsidr;
};

/*
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index e0267f672b8a..dc235ddc6172 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -157,6 +157,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
if (sve_state)
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
kfree(sve_state);
+ kfree(vcpu->arch.ccsidr);
}

static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5617de916c80..e789f9dea277 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -11,6 +11,7 @@

#include <linux/bitfield.h>
#include <linux/bsearch.h>
+#include <linux/cacheinfo.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <linux/printk.h>
@@ -81,25 +82,85 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
__vcpu_sys_reg(vcpu, reg) = val;
}

-/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
-static u32 cache_levels;
-
/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
#define CSSELR_MAX 14

+static u8 get_min_cache_line_size(u32 csselr)
+{
+ u64 ctr_el0;
+ int field;
+
+ ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+ field = csselr & CSSELR_EL1_InD ? CTR_EL0_IminLine_SHIFT : CTR_EL0_DminLine_SHIFT;
+
+ /*
+ * Determine Log2(Number of bytes) - 4, which is the encoding of cache
+ * line size in CCSIDR_EL0. In CTR_EL0, the cache line size is
+ * represented with:
+ * Log2(Number of words) = Log2((Number of bytes) / 4)
+ * = Log2(Number of bytes) - 2
+ */
+ return cpuid_feature_extract_unsigned_field(ctr_el0, field) - 2;
+}
+
/* Which cache CCSIDR represents depends on CSSELR value. */
-static u32 get_ccsidr(u32 csselr)
+static u32 get_ccsidr(struct kvm_vcpu *vcpu, u32 csselr)
{
- u32 ccsidr;
+ if (vcpu->arch.ccsidr)
+ return vcpu->arch.ccsidr[csselr];

- /* Make sure noone else changes CSSELR during this! */
- local_irq_disable();
- write_sysreg(csselr, csselr_el1);
- isb();
- ccsidr = read_sysreg(ccsidr_el1);
- local_irq_enable();
+ /*
+ * Fabricate a CCSIDR value as the overriding value does not exist.
+ * The real CCSIDR value will not be used as it can vary by the
+ * physical CPU which the vcpu currently resides in.
+ *
+ * The line size is determined with get_min_cache_line_size(), which
+ * should be valid for all CPUs even if they have different cache
+ * configuration.
+ *
+ * The associativity bits are cleared, meaning the geometry of all data
+ * and unified caches (which are guaranteed to be PIPT and thus
+ * non-aliasing) are 1 set and 1 way.
+ * Guests should not be doing cache operations by set/way at all, and
+ * for this reason, we trap them and attempt to infer the intent, so
+ * that we can flush the entire guest's address space at the appropriate
+ * time. The exposed geometry minimizes the number of the traps.
+ * [If guests should attempt to infer aliasing properties from the
+ * geometry (which is not permitted by the architecture), they would
+ * only do so for virtually indexed caches.]
+ *
+ * We don't check if the cache level exists as it is allowed to return
+ * an UNKNOWN value if not.
+ */
+ return get_min_cache_line_size(csselr) << CCSIDR_EL1_LineSize_SHIFT;
+}

- return ccsidr;
+static int set_ccsidr(struct kvm_vcpu *vcpu, u32 csselr, u32 val)
+{
+ u8 line_size = FIELD_GET(CCSIDR_EL1_LineSize, val);
+ u32 *ccsidr = vcpu->arch.ccsidr;
+ u32 i;
+
+ if ((val & CCSIDR_EL1_RES0) || line_size < get_min_cache_line_size(csselr))
+ return -EINVAL;
+
+ if (!ccsidr) {
+ if (val == get_ccsidr(vcpu, csselr))
+ return 0;
+
+ ccsidr = kmalloc_array(CSSELR_MAX, sizeof(u32), GFP_KERNEL);
+ if (!ccsidr)
+ return -ENOMEM;
+
+ for (i = 0; i < CSSELR_MAX; i++)
+ ccsidr[i] = get_ccsidr(vcpu, i);
+
+ vcpu->arch.ccsidr = ccsidr;
+ }
+
+ ccsidr[csselr] = val;
+
+ return 0;
}

/*
@@ -1391,10 +1452,78 @@ static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write)
return write_to_read_only(vcpu, p, r);

- p->regval = read_sysreg(clidr_el1);
+ p->regval = __vcpu_sys_reg(vcpu, r->reg);
return true;
}

+/*
+ * Fabricate a CLIDR_EL1 value instead of using the real value, which can vary
+ * by the physical CPU which the vcpu currently resides in.
+ */
+static void reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+ u64 clidr;
+ u8 loc;
+
+ if ((ctr_el0 & CTR_EL0_IDC)) {
+ /*
+ * Data cache clean to the PoU is not required so LoUU and LoUIS
+ * will not be set and a unified cache, which will be marked as
+ * LoC, will be added.
+ *
+ * If not DIC, let the unified cache L2 so that an instruction
+ * cache can be added as L1 later.
+ */
+ loc = (ctr_el0 & CTR_EL0_DIC) ? 1 : 2;
+ clidr = CACHE_TYPE_UNIFIED << CLIDR_CTYPE_SHIFT(loc);
+ } else {
+ /*
+ * Data cache clean to the PoU is required so let L1 have a data
+ * cache and mark it as LoUU and LoUIS. As L1 has a data cache,
+ * it can be marked as LoC too.
+ */
+ loc = 1;
+ clidr = 1 << CLIDR_LOUU_SHIFT;
+ clidr |= 1 << CLIDR_LOUIS_SHIFT;
+ clidr |= CACHE_TYPE_DATA << CLIDR_CTYPE_SHIFT(1);
+ }
+
+ /*
+ * Instruction cache invalidation to the PoU is required so let L1 have
+ * an instruction cache. If L1 already has a data cache, it will be
+ * CACHE_TYPE_SEPARATE.
+ */
+ if (!(ctr_el0 & CTR_EL0_DIC))
+ clidr |= CACHE_TYPE_INST << CLIDR_CTYPE_SHIFT(1);
+
+ clidr |= loc << CLIDR_LOC_SHIFT;
+
+ /*
+ * Add tag cache unified to data cache. Allocation tags and data are
+ * unified in a cache line so that it looks valid even if there is only
+ * one cache line.
+ */
+ if (kvm_has_mte(vcpu->kvm))
+ clidr |= 2 << CLIDR_TTYPE_SHIFT(loc);
+
+ __vcpu_sys_reg(vcpu, r->reg) = clidr;
+}
+
+static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ u64 val)
+{
+ u64 ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+ u64 idc = !CLIDR_LOC(val) || (!CLIDR_LOUIS(val) && !CLIDR_LOUU(val));
+
+ if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc))
+ return -EINVAL;
+
+ __vcpu_sys_reg(vcpu, rd->reg) = val;
+
+ return 0;
+}
+
static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
@@ -1416,22 +1545,10 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return write_to_read_only(vcpu, p, r);

csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1);
- p->regval = get_ccsidr(csselr);
+ csselr &= CSSELR_EL1_Level | CSSELR_EL1_InD;
+ if (csselr < CSSELR_MAX)
+ p->regval = get_ccsidr(vcpu, csselr);

- /*
- * Guests should not be doing cache operations by set/way at all, and
- * for this reason, we trap them and attempt to infer the intent, so
- * that we can flush the entire guest's address space at the appropriate
- * time.
- * To prevent this trapping from causing performance problems, let's
- * expose the geometry of all data and unified caches (which are
- * guaranteed to be PIPT and thus non-aliasing) as 1 set and 1 way.
- * [If guests should attempt to infer aliasing properties from the
- * geometry (which is not permitted by the architecture), they would
- * only do so for virtually indexed caches.]
- */
- if (!(csselr & 1)) // data or unified cache
- p->regval &= ~GENMASK(27, 3);
return true;
}

@@ -1723,7 +1840,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},

{ SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr },
- { SYS_DESC(SYS_CLIDR_EL1), access_clidr },
+ { SYS_DESC(SYS_CLIDR_EL1), access_clidr, reset_clidr, CLIDR_EL1,
+ .set_user = set_clidr },
{ SYS_DESC(SYS_CCSIDR2_EL1), undef_access },
{ SYS_DESC(SYS_SMIDR_EL1), undef_access },
{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
@@ -2735,7 +2853,6 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,

FUNCTION_INVARIANT(midr_el1)
FUNCTION_INVARIANT(revidr_el1)
-FUNCTION_INVARIANT(clidr_el1)
FUNCTION_INVARIANT(aidr_el1)

static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
@@ -2747,7 +2864,6 @@ static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
static struct sys_reg_desc invariant_sys_regs[] = {
{ SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
{ SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
- { SYS_DESC(SYS_CLIDR_EL1), NULL, get_clidr_el1 },
{ SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
{ SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
};
@@ -2784,33 +2900,7 @@ static int set_invariant_sys_reg(u64 id, u64 __user *uaddr)
return 0;
}

-static bool is_valid_cache(u32 val)
-{
- u32 level, ctype;
-
- if (val >= CSSELR_MAX)
- return false;
-
- /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
- level = (val >> 1);
- ctype = (cache_levels >> (level * 3)) & 7;
-
- switch (ctype) {
- case 0: /* No cache */
- return false;
- case 1: /* Instruction cache only */
- return (val & 1);
- case 2: /* Data cache only */
- case 4: /* Unified cache */
- return !(val & 1);
- case 3: /* Separate instruction and data caches */
- return true;
- default: /* Reserved: we can't know instruction or data. */
- return false;
- }
-}
-
-static int demux_c15_get(u64 id, void __user *uaddr)
+static int demux_c15_get(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
{
u32 val;
u32 __user *uval = uaddr;
@@ -2826,16 +2916,16 @@ static int demux_c15_get(u64 id, void __user *uaddr)
return -ENOENT;
val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
- if (!is_valid_cache(val))
+ if (val >= CSSELR_MAX)
return -ENOENT;

- return put_user(get_ccsidr(val), uval);
+ return put_user(get_ccsidr(vcpu, val), uval);
default:
return -ENOENT;
}
}

-static int demux_c15_set(u64 id, void __user *uaddr)
+static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
{
u32 val, newval;
u32 __user *uval = uaddr;
@@ -2851,16 +2941,13 @@ static int demux_c15_set(u64 id, void __user *uaddr)
return -ENOENT;
val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
- if (!is_valid_cache(val))
+ if (val >= CSSELR_MAX)
return -ENOENT;

if (get_user(newval, uval))
return -EFAULT;

- /* This is also invariant: you can't change it. */
- if (newval != get_ccsidr(val))
- return -EINVAL;
- return 0;
+ return set_ccsidr(vcpu, val, newval);
default:
return -ENOENT;
}
@@ -2897,7 +2984,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
int err;

if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
- return demux_c15_get(reg->id, uaddr);
+ return demux_c15_get(vcpu, reg->id, uaddr);

err = get_invariant_sys_reg(reg->id, uaddr);
if (err != -ENOENT)
@@ -2941,7 +3028,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
int err;

if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
- return demux_c15_set(reg->id, uaddr);
+ return demux_c15_set(vcpu, reg->id, uaddr);

err = set_invariant_sys_reg(reg->id, uaddr);
if (err != -ENOENT)
@@ -2953,13 +3040,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg

static unsigned int num_demux_regs(void)
{
- unsigned int i, count = 0;
-
- for (i = 0; i < CSSELR_MAX; i++)
- if (is_valid_cache(i))
- count++;
-
- return count;
+ return CSSELR_MAX;
}

static int write_demux_regids(u64 __user *uindices)
@@ -2969,8 +3050,6 @@ static int write_demux_regids(u64 __user *uindices)

val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
for (i = 0; i < CSSELR_MAX; i++) {
- if (!is_valid_cache(i))
- continue;
if (put_user(val | i, uindices))
return -EFAULT;
uindices++;
@@ -3072,7 +3151,6 @@ int kvm_sys_reg_table_init(void)
{
bool valid = true;
unsigned int i;
- struct sys_reg_desc clidr;

/* Make sure tables are unique and in order. */
valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false);
@@ -3089,23 +3167,5 @@ int kvm_sys_reg_table_init(void)
for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);

- /*
- * CLIDR format is awkward, so clean it up. See ARM B4.1.20:
- *
- * If software reads the Cache Type fields from Ctype1
- * upwards, once it has seen a value of 0b000, no caches
- * exist at further-out levels of the hierarchy. So, for
- * example, if Ctype3 is the first Cache Type field with a
- * value of 0b000, the values of Ctype4 to Ctype7 must be
- * ignored.
- */
- get_clidr_el1(NULL, &clidr); /* Ugly... */
- cache_levels = clidr.val;
- for (i = 0; i < 7; i++)
- if (((cache_levels >> (i*3)) & 7) == 0)
- break;
- /* Clear all higher bits. */
- cache_levels &= (1 << (i*3))-1;
-
return 0;
}
--
2.38.1

2023-01-07 10:45:46

by Akihiko Odaki

[permalink] [raw]

Subject: [PATCH v6 2/7] arm64/sysreg: Convert CCSIDR_EL1 to automatic generation

Convert CCSIDR_EL1 to automatic generation as per DDI0487I.a.

Signed-off-by: Akihiko Odaki <[email protected]>
Reviewed-by: Mark Brown <[email protected]>
---
arch/arm64/include/asm/sysreg.h | 1 -
arch/arm64/tools/sysreg | 10 ++++++++++
2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 1312fb48f18b..a2a93b3fc557 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -404,7 +404,6 @@

#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)

-#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0)
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)

#define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index f754265aec5f..45648fa89be8 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -1637,6 +1637,16 @@ Sysreg SCXTNUM_EL1 3 0 13 0 7
Field 63:0 SoftwareContextNumber
EndSysreg

+# The bit layout for CCSIDR_EL1 depends on whether FEAT_CCIDX is implemented.
+# The following is for case when FEAT_CCIDX is not implemented.
+Sysreg CCSIDR_EL1 3 1 0 0 0
+Res0 63:32
+Unkn 31:28
+Field 27:13 NumSets
+Field 12:3 Associativity
+Field 2:0 LineSize
+EndSysreg
+
Sysreg CLIDR_EL1 3 1 0 0 1
Res0 63:47
Field 46:33 Ttypen
--
2.38.1

2023-01-11 19:16:58

by Oliver Upton

[permalink] [raw]

Subject: Re: [PATCH v6 7/7] KVM: arm64: Normalize cache configuration

On Sat, Jan 07, 2023 at 06:46:29PM +0900, Akihiko Odaki wrote:
> Before this change, the cache configuration of the physical CPU was
> exposed to vcpus. This is problematic because the cache configuration a
> vcpu sees varies when it migrates between vcpus with different cache
> configurations.
>
> Fabricate cache configuration from the sanitized value, which holds the
> CTR_EL0 value the userspace sees regardless of which physical CPU it
> resides on.
>
> CLIDR_EL1 and CCSIDR_EL1 are now writable from the userspace so that
> the VMM can restore the values saved with the old kernel.
>
> Suggested-by: Marc Zyngier <[email protected]>
> Signed-off-by: Akihiko Odaki <[email protected]>
> ---
> arch/arm64/include/asm/cache.h | 3 +
> arch/arm64/include/asm/kvm_host.h | 4 +
> arch/arm64/kvm/reset.c | 1 +
> arch/arm64/kvm/sys_regs.c | 252 ++++++++++++++++++------------
> 4 files changed, 164 insertions(+), 96 deletions(-)
>
> diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
> index ab7133654a72..a51e6e8f3171 100644
> --- a/arch/arm64/include/asm/cache.h
> +++ b/arch/arm64/include/asm/cache.h
> @@ -22,6 +22,9 @@
> #define CLIDR_CTYPE(clidr, level) \
> (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
>
> +/* Ttypen, bits [2(n - 1) + 34 : 2(n - 1) + 33], for n = 1 to 7 */
> +#define CLIDR_TTYPE_SHIFT(level) (2 * ((level) - 1) + CLIDR_EL1_Ttypen_SHIFT)
> +
> /*
> * Memory returned by kmalloc() may be used for DMA, so we must make
> * sure that all such allocations are cache aligned. Otherwise,
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 374390a9212e..496602e0b299 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -252,6 +252,7 @@ struct kvm_vcpu_fault_info {
> enum vcpu_sysreg {
> __INVALID_SYSREG__, /* 0 is reserved as an invalid value */
> MPIDR_EL1, /* MultiProcessor Affinity Register */
> + CLIDR_EL1, /* Cache Level ID Register */
> CSSELR_EL1, /* Cache Size Selection Register */
> SCTLR_EL1, /* System Control Register */
> ACTLR_EL1, /* Auxiliary Control Register */
> @@ -501,6 +502,9 @@ struct kvm_vcpu_arch {
> u64 last_steal;
> gpa_t base;
> } steal;
> +
> + /* Per-vcpu CCSIDR override or NULL */
> + u32 *ccsidr;
> };
>
> /*
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index e0267f672b8a..dc235ddc6172 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -157,6 +157,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
> if (sve_state)
> kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
> kfree(sve_state);
> + kfree(vcpu->arch.ccsidr);
> }
>
> static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 5617de916c80..e789f9dea277 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -11,6 +11,7 @@
>
> #include <linux/bitfield.h>
> #include <linux/bsearch.h>
> +#include <linux/cacheinfo.h>
> #include <linux/kvm_host.h>
> #include <linux/mm.h>
> #include <linux/printk.h>
> @@ -81,25 +82,85 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
> __vcpu_sys_reg(vcpu, reg) = val;
> }
>
> -/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
> -static u32 cache_levels;
> -
> /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
> #define CSSELR_MAX 14
>
> +static u8 get_min_cache_line_size(u32 csselr)
> +{
> + u64 ctr_el0;
> + int field;
> +
> + ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
> + field = csselr & CSSELR_EL1_InD ? CTR_EL0_IminLine_SHIFT : CTR_EL0_DminLine_SHIFT;
> +
> + /*
> + * Determine Log2(Number of bytes) - 4, which is the encoding of cache
> + * line size in CCSIDR_EL0. In CTR_EL0, the cache line size is
> + * represented with:
> + * Log2(Number of words) = Log2((Number of bytes) / 4)
> + * = Log2(Number of bytes) - 2
> + */
> + return cpuid_feature_extract_unsigned_field(ctr_el0, field) - 2;
> +}

So I definitely got my math wrong when I was reading this the first
time, apologies.

Nonetheless, I still find the return value confusing here. It would be
better to just return Log2(bytes) outright (i.e. no offset) and document
that. I worry that the next user of this function will miss that detail.

While at it we should probably convert to the new sysreg field helpers
too.

/*
* Returns the minimum line size for the selected cache, expressed as
* Log2(bytes).
*/
static u8 get_min_cache_line_size(bool icache)
{
u64 ctr = read_sanitised_ftr_reg(SYS_CTR_EL0);
u8 field;

if (icache)
field = SYS_FIELD_GET(CTR_EL0, IminSize, ctr);
else
field = SYS_FIELD_GET(CTR_EL0, DminSize, ctr);

/*
* Cache line size is represented as Log2(words) in CTR_EL0.
* Log2(bytes) can be derived with the following:
*
* Log2(words) + 2 = Log2(bytes / 4) + 2
* = Log2(bytes) - 2 + 2
* = Log2(bytes)
*/
return field + 2;
}

> +
> /* Which cache CCSIDR represents depends on CSSELR value. */
> -static u32 get_ccsidr(u32 csselr)
> +static u32 get_ccsidr(struct kvm_vcpu *vcpu, u32 csselr)
> {
> - u32 ccsidr;
> + if (vcpu->arch.ccsidr)
> + return vcpu->arch.ccsidr[csselr];
>
> - /* Make sure noone else changes CSSELR during this! */
> - local_irq_disable();
> - write_sysreg(csselr, csselr_el1);
> - isb();
> - ccsidr = read_sysreg(ccsidr_el1);
> - local_irq_enable();
> + /*
> + * Fabricate a CCSIDR value as the overriding value does not exist.
> + * The real CCSIDR value will not be used as it can vary by the
> + * physical CPU which the vcpu currently resides in.
> + *
> + * The line size is determined with get_min_cache_line_size(), which
> + * should be valid for all CPUs even if they have different cache
> + * configuration.
> + *
> + * The associativity bits are cleared, meaning the geometry of all data
> + * and unified caches (which are guaranteed to be PIPT and thus
> + * non-aliasing) are 1 set and 1 way.
> + * Guests should not be doing cache operations by set/way at all, and
> + * for this reason, we trap them and attempt to infer the intent, so
> + * that we can flush the entire guest's address space at the appropriate
> + * time. The exposed geometry minimizes the number of the traps.
> + * [If guests should attempt to infer aliasing properties from the
> + * geometry (which is not permitted by the architecture), they would
> + * only do so for virtually indexed caches.]
> + *
> + * We don't check if the cache level exists as it is allowed to return
> + * an UNKNOWN value if not.
> + */
> + return get_min_cache_line_size(csselr) << CCSIDR_EL1_LineSize_SHIFT;

So with the above change, this would become:

u8 line_size = get_min_cache_line_size(csselr & CSSELR_EL1_InD);

return SYS_FIELD_PREP(CSSELR_EL1, LineSize, line_size - 4);

Which I find slightly more readable because it moves the -4 offset to
where the relevant field is initialized. Adding an extra bit of
information to your comment explaining the offset is likely worthwhile
too.

--
Thanks,
Oliver