2024-05-14 07:23:56

by Sebastian Ott

[permalink] [raw]
Subject: [PATCH v3 0/6] KVM: arm64: emulation for CTR_EL0

Hej folks,

I'm looking into supporting migration between 2 Ampere Altra (Max)
machines (using Neoverse-N1). They are almost identical regarding
their feature id register state except for CTR_EL0.DIC which is set
on one machine but not the other.

CTR_EL0 is currently marked as invariant and migrating a VM between
those 2 machines using qemu fails.

Changes RFC [0] -> V1 [1]:
* store the emulated value per VM and not per VCPU
* allow to change more values than just the DIC bit
* only trap guest access to that reg when needed
* make sure to not present the guest with an inconsistent register set
Changes V1 -> V2 [2]:
* implemented Marc's suggestion for keeping registers consistent while
not breaking userspace ABI / expectations (I hope correctly this time)
* keep the shadowed value valid at all time
* unify the code to setup traps
Changes V2 -> V3:
* rebased to kvm-arm-next (to include Olivers idreg fixes)
* fixed VM ops trapping for non-FWB CPUs
* fixed writable mask for CLIDR_EL1
* re-added manual ctr validation (using arm64_check_features() had a
side effect with the way .reset is working for these registers)
* added a testcase

Thanks,
Sebastian

[0]: https://lore.kernel.org/all/[email protected]/T/
[1]: https://lore.kernel.org/lkml/[email protected]/T/
[2]: https://lore.kernel.org/lkml/[email protected]/T/

Sebastian Ott (6):
KVM: arm64: unify code to prepare traps
KVM: arm64: maintain per VM value for CTR_EL0
KVM: arm64: add emulation for CTR_EL0 register
KVM: arm64: show writable masks for feature registers
KVM: arm64: rename functions for invariant sys regs
KVM: selftests: arm64: Test writes to CTR_EL0

arch/arm64/include/asm/kvm_emulate.h | 40 +---
arch/arm64/include/asm/kvm_host.h | 4 +-
arch/arm64/kvm/arm.c | 2 +-
arch/arm64/kvm/sys_regs.c | 210 ++++++++++++++----
.../selftests/kvm/aarch64/set_id_regs.c | 16 ++
5 files changed, 197 insertions(+), 75 deletions(-)

--
2.42.0



2024-05-14 07:29:18

by Sebastian Ott

[permalink] [raw]
Subject: [PATCH v3 3/6] KVM: arm64: add emulation for CTR_EL0 register

CTR_EL0 is currently handled as an invariant register, thus
guests will be presented with the host value of that register.

Add emulation for CTR_EL0 based on a per VM value. Userspace can
switch off DIC and IDC bits and reduce DminLine and IminLine sizes.

When CTR_EL0 is changed validate that against CLIDR_EL1 and CCSIDR_EL1
to make sure we present the guest with consistent register values.
Changes that affect the generated cache topology values are allowed if
they don't clash with previous register writes.

Signed-off-by: Sebastian Ott <[email protected]>
---
arch/arm64/kvm/sys_regs.c | 130 +++++++++++++++++++++++++++++++++-----
1 file changed, 114 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 0213c96f73f2..8e8acf3dd9bd 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -215,13 +215,8 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
#define CSSELR_MAX 14

-/*
- * Returns the minimum line size for the selected cache, expressed as
- * Log2(bytes).
- */
-static u8 get_min_cache_line_size(struct kvm *kvm, bool icache)
+static u8 __get_min_cache_line_size(u64 ctr, bool icache)
{
- u64 ctr = kvm->arch.ctr_el0;
u8 field;

if (icache)
@@ -240,6 +235,15 @@ static u8 get_min_cache_line_size(struct kvm *kvm, bool icache)
return field + 2;
}

+/*
+ * Returns the minimum line size for the selected cache, expressed as
+ * Log2(bytes).
+ */
+static u8 get_min_cache_line_size(struct kvm *kvm, bool icache)
+{
+ return __get_min_cache_line_size(kvm->arch.ctr_el0, icache);
+}
+
/* Which cache CCSIDR represents depends on CSSELR value. */
static u32 get_ccsidr(struct kvm_vcpu *vcpu, u32 csselr)
{
@@ -1880,6 +1884,45 @@ static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
return 0;
}

+static const struct sys_reg_desc *get_sys_reg_desc(u32 encoding);
+
+static int validate_clidr_el1(u64 clidr_el1, u64 ctr_el0)
+{
+ u64 idc = !CLIDR_LOC(clidr_el1) ||
+ (!CLIDR_LOUIS(clidr_el1) && !CLIDR_LOUU(clidr_el1));
+
+ if ((clidr_el1 & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int validate_cache_top(struct kvm_vcpu *vcpu, u64 ctr_el0)
+{
+ const struct sys_reg_desc *clidr_el1;
+ unsigned int i;
+ int ret;
+
+ clidr_el1 = get_sys_reg_desc(SYS_CLIDR_EL1);
+ if (!clidr_el1)
+ return -ENOENT;
+
+ ret = validate_clidr_el1(__vcpu_sys_reg(vcpu, clidr_el1->reg), ctr_el0);
+ if (ret)
+ return ret;
+
+ if (!vcpu->arch.ccsidr)
+ return 0;
+
+ for (i = 0; i < CSSELR_MAX; i++) {
+ if ((FIELD_GET(CCSIDR_EL1_LineSize, get_ccsidr(vcpu, i)) + 4)
+ < __get_min_cache_line_size(ctr_el0, i & CSSELR_EL1_InD))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
@@ -1890,6 +1933,55 @@ static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return true;
}

+static u64 reset_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
+{
+ vcpu->kvm->arch.ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+ return vcpu->kvm->arch.ctr_el0;
+}
+
+static int get_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ u64 *val)
+{
+ *val = vcpu->kvm->arch.ctr_el0;
+ return 0;
+}
+
+static int set_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ u64 val)
+{
+ u64 ctr = vcpu->kvm->arch.ctr_el0;
+ u64 writable_mask = rd->val;
+ int ret;
+
+ if (val == ctr)
+ return 0;
+
+ if (kvm_vm_has_ran_once(vcpu->kvm))
+ return -EBUSY;
+
+ if ((ctr & ~writable_mask) != (val & ~writable_mask))
+ return -EINVAL;
+
+ if (((ctr & CTR_EL0_DIC_MASK) < (val & CTR_EL0_DIC_MASK)) ||
+ ((ctr & CTR_EL0_IDC_MASK) < (val & CTR_EL0_IDC_MASK)) ||
+ ((ctr & CTR_EL0_DminLine_MASK) < (val & CTR_EL0_DminLine_MASK)) ||
+ ((ctr & CTR_EL0_IminLine_MASK) < (val & CTR_EL0_IminLine_MASK))) {
+ return -EINVAL;
+ }
+
+ mutex_lock(&vcpu->kvm->arch.config_lock);
+ ret = validate_cache_top(vcpu, val);
+ if (ret) {
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+ return ret;
+ }
+
+ vcpu->kvm->arch.ctr_el0 = val;
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+
+ return 0;
+}
+
static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
@@ -1959,10 +2051,9 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
u64 val)
{
- u64 idc = !CLIDR_LOC(val) || (!CLIDR_LOUIS(val) && !CLIDR_LOUU(val));
u64 ctr_el0 = vcpu->kvm->arch.ctr_el0;

- if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc))
+ if (validate_clidr_el1(val, ctr_el0))
return -EINVAL;

__vcpu_sys_reg(vcpu, rd->reg) = val;
@@ -2475,7 +2566,11 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_CCSIDR2_EL1), undef_access },
{ SYS_DESC(SYS_SMIDR_EL1), undef_access },
{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
- { SYS_DESC(SYS_CTR_EL0), access_ctr },
+ { SYS_DESC(SYS_CTR_EL0), access_ctr, .reset = reset_ctr,
+ .get_user = get_ctr, .set_user = set_ctr, .val = (CTR_EL0_DIC_MASK |
+ CTR_EL0_IDC_MASK |
+ CTR_EL0_DminLine_MASK |
+ CTR_EL0_IminLine_MASK)},
{ SYS_DESC(SYS_SVCR), undef_access },

{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
@@ -3651,6 +3746,13 @@ static bool index_to_params(u64 id, struct sys_reg_params *params)
}
}

+static const struct sys_reg_desc *get_sys_reg_desc(u32 encoding)
+{
+ struct sys_reg_params params = encoding_to_params(encoding);
+
+ return find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+}
+
const struct sys_reg_desc *get_reg_by_id(u64 id,
const struct sys_reg_desc table[],
unsigned int num)
@@ -3704,18 +3806,11 @@ FUNCTION_INVARIANT(midr_el1)
FUNCTION_INVARIANT(revidr_el1)
FUNCTION_INVARIANT(aidr_el1)

-static u64 get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
-{
- ((struct sys_reg_desc *)r)->val = read_sanitised_ftr_reg(SYS_CTR_EL0);
- return ((struct sys_reg_desc *)r)->val;
-}
-
/* ->val is filled in by kvm_sys_reg_table_init() */
static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = {
{ SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
{ SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
{ SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
- { SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
};

static int get_invariant_sys_reg(u64 id, u64 __user *uaddr)
@@ -4083,6 +4178,9 @@ static void vcpu_set_hcr(struct kvm_vcpu *vcpu)
*/
if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
vcpu->arch.hcr_el2 |= HCR_TTLBOS;
+
+ if (kvm->arch.ctr_el0 != read_sanitised_ftr_reg(SYS_CTR_EL0))
+ vcpu->arch.hcr_el2 |= HCR_TID2;
}

void kvm_calculate_traps(struct kvm_vcpu *vcpu)
--
2.42.0


2024-05-14 07:30:29

by Sebastian Ott

[permalink] [raw]
Subject: [PATCH v3 5/6] KVM: arm64: rename functions for invariant sys regs

Invariant system id registers are populated with host values
at initialization time using their .reset function cb.

These are currently called get_* which is usually used by
the functions implementing the .get_user callback.

Change their function names to reset_* to reflect what they
are used for.

Signed-off-by: Sebastian Ott <[email protected]>
---
arch/arm64/kvm/sys_regs.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1b6ab483e21e..bae72a4ce72b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -3795,8 +3795,8 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
*/

#define FUNCTION_INVARIANT(reg) \
- static u64 get_##reg(struct kvm_vcpu *v, \
- const struct sys_reg_desc *r) \
+ static u64 reset_##reg(struct kvm_vcpu *v, \
+ const struct sys_reg_desc *r) \
{ \
((struct sys_reg_desc *)r)->val = read_sysreg(reg); \
return ((struct sys_reg_desc *)r)->val; \
@@ -3808,9 +3808,9 @@ FUNCTION_INVARIANT(aidr_el1)

/* ->val is filled in by kvm_sys_reg_table_init() */
static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = {
- { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
- { SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
- { SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
+ { SYS_DESC(SYS_MIDR_EL1), NULL, reset_midr_el1 },
+ { SYS_DESC(SYS_REVIDR_EL1), NULL, reset_revidr_el1 },
+ { SYS_DESC(SYS_AIDR_EL1), NULL, reset_aidr_el1 },
};

static int get_invariant_sys_reg(u64 id, u64 __user *uaddr)
--
2.42.0


2024-05-14 07:34:29

by Sebastian Ott

[permalink] [raw]
Subject: [PATCH v3 1/6] KVM: arm64: unify code to prepare traps

There are 2 functions to calculate traps via HCR_EL2:
* kvm_init_sysreg() called via KVM_RUN (before the 1st run or when
the pid changes)
* vcpu_reset_hcr() called via KVM_ARM_VCPU_INIT

To unify these 2 and to support traps that are dependent on the
ID register configuration, move the code from vcpu_reset_hcr()
to sys_regs.c and call it via kvm_init_sysreg().

We still have to keep the non-FWB handling stuff in vcpu_reset_hcr().
Also the initialization with HCR_GUEST_FLAGS is kept there but guarded
by !vcpu_has_run_once() to ensure that previous calculated values
don't get overwritten.

While at it rename kvm_init_sysreg() to kvm_calculate_traps() to
better reflect what it's doing.

Signed-off-by: Sebastian Ott <[email protected]>
---
arch/arm64/include/asm/kvm_emulate.h | 40 +++++++---------------------
arch/arm64/include/asm/kvm_host.h | 2 +-
arch/arm64/kvm/arm.c | 2 +-
arch/arm64/kvm/sys_regs.c | 34 +++++++++++++++++++++--
4 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 501e3e019c93..84dc3fac9711 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -69,39 +69,17 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)

static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
- vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
- if (has_vhe() || has_hvhe())
- vcpu->arch.hcr_el2 |= HCR_E2H;
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
- /* route synchronous external abort exceptions to EL2 */
- vcpu->arch.hcr_el2 |= HCR_TEA;
- /* trap error record accesses */
- vcpu->arch.hcr_el2 |= HCR_TERR;
- }
+ if (!vcpu_has_run_once(vcpu))
+ vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;

- if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) {
- vcpu->arch.hcr_el2 |= HCR_FWB;
- } else {
- /*
- * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
- * get set in SCTLR_EL1 such that we can detect when the guest
- * MMU gets turned on and do the necessary cache maintenance
- * then.
- */
+ /*
+ * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
+ * get set in SCTLR_EL1 such that we can detect when the guest
+ * MMU gets turned on and do the necessary cache maintenance
+ * then.
+ */
+ if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
vcpu->arch.hcr_el2 |= HCR_TVM;
- }
-
- if (cpus_have_final_cap(ARM64_HAS_EVT) &&
- !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
- vcpu->arch.hcr_el2 |= HCR_TID4;
- else
- vcpu->arch.hcr_el2 |= HCR_TID2;
-
- if (vcpu_el1_is_32bit(vcpu))
- vcpu->arch.hcr_el2 &= ~HCR_RW;
-
- if (kvm_has_mte(vcpu->kvm))
- vcpu->arch.hcr_el2 |= HCR_ATA;
}

static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8170c04fde91..212ae77eefaf 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1122,7 +1122,7 @@ int __init populate_nv_trap_config(void);
bool lock_all_vcpus(struct kvm *kvm);
void unlock_all_vcpus(struct kvm *kvm);

-void kvm_init_sysreg(struct kvm_vcpu *);
+void kvm_calculate_traps(struct kvm_vcpu *);

/* MMIO helpers */
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9996a989b52e..6b217afb4e8e 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -797,7 +797,7 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
* This needs to happen after NV has imposed its own restrictions on
* the feature set
*/
- kvm_init_sysreg(vcpu);
+ kvm_calculate_traps(vcpu);

ret = kvm_timer_enable(vcpu);
if (ret)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 22b45a15d068..41741bf4d2b2 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -4041,11 +4041,33 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *
return 0;
}

-void kvm_init_sysreg(struct kvm_vcpu *vcpu)
+static void vcpu_set_hcr(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;

- mutex_lock(&kvm->arch.config_lock);
+ if (has_vhe() || has_hvhe())
+ vcpu->arch.hcr_el2 |= HCR_E2H;
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
+ /* route synchronous external abort exceptions to EL2 */
+ vcpu->arch.hcr_el2 |= HCR_TEA;
+ /* trap error record accesses */
+ vcpu->arch.hcr_el2 |= HCR_TERR;
+ }
+
+ if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
+ vcpu->arch.hcr_el2 |= HCR_FWB;
+
+ if (cpus_have_final_cap(ARM64_HAS_EVT) &&
+ !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
+ vcpu->arch.hcr_el2 |= HCR_TID4;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TID2;
+
+ if (vcpu_el1_is_32bit(vcpu))
+ vcpu->arch.hcr_el2 &= ~HCR_RW;
+
+ if (kvm_has_mte(vcpu->kvm))
+ vcpu->arch.hcr_el2 |= HCR_ATA;

/*
* In the absence of FGT, we cannot independently trap TLBI
@@ -4054,6 +4076,14 @@ void kvm_init_sysreg(struct kvm_vcpu *vcpu)
*/
if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
vcpu->arch.hcr_el2 |= HCR_TTLBOS;
+}
+
+void kvm_calculate_traps(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ mutex_lock(&kvm->arch.config_lock);
+ vcpu_set_hcr(vcpu);

if (cpus_have_final_cap(ARM64_HAS_HCX)) {
vcpu->arch.hcrx_el2 = HCRX_GUEST_FLAGS;
--
2.42.0


2024-05-27 08:02:17

by Shaoqin Huang

[permalink] [raw]
Subject: Re: [PATCH v3 1/6] KVM: arm64: unify code to prepare traps

Hi Sebastian,

On 5/14/24 15:22, Sebastian Ott wrote:
> There are 2 functions to calculate traps via HCR_EL2:
> * kvm_init_sysreg() called via KVM_RUN (before the 1st run or when
> the pid changes)
> * vcpu_reset_hcr() called via KVM_ARM_VCPU_INIT
>
> To unify these 2 and to support traps that are dependent on the
> ID register configuration, move the code from vcpu_reset_hcr()
> to sys_regs.c and call it via kvm_init_sysreg().
>
> We still have to keep the non-FWB handling stuff in vcpu_reset_hcr().
> Also the initialization with HCR_GUEST_FLAGS is kept there but guarded
> by !vcpu_has_run_once() to ensure that previous calculated values
> don't get overwritten.
>
> While at it rename kvm_init_sysreg() to kvm_calculate_traps() to
> better reflect what it's doing.
>
> Signed-off-by: Sebastian Ott <[email protected]>
> ---
> arch/arm64/include/asm/kvm_emulate.h | 40 +++++++---------------------
> arch/arm64/include/asm/kvm_host.h | 2 +-
> arch/arm64/kvm/arm.c | 2 +-
> arch/arm64/kvm/sys_regs.c | 34 +++++++++++++++++++++--
> 4 files changed, 43 insertions(+), 35 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index 501e3e019c93..84dc3fac9711 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -69,39 +69,17 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
>
> static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
> {
> - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
> - if (has_vhe() || has_hvhe())
> - vcpu->arch.hcr_el2 |= HCR_E2H;
> - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
> - /* route synchronous external abort exceptions to EL2 */
> - vcpu->arch.hcr_el2 |= HCR_TEA;
> - /* trap error record accesses */
> - vcpu->arch.hcr_el2 |= HCR_TERR;
> - }
> + if (!vcpu_has_run_once(vcpu))
> + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;

Could you give more explaination in your comments about why we still
keep the non-FWB handling in vcpu_reset_hcr()? That would be better for
understanding the special case.

Thanks,
Shaoqin

>
> - if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) {
> - vcpu->arch.hcr_el2 |= HCR_FWB;
> - } else {
> - /*
> - * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
> - * get set in SCTLR_EL1 such that we can detect when the guest
> - * MMU gets turned on and do the necessary cache maintenance
> - * then.
> - */
> + /*
> + * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
> + * get set in SCTLR_EL1 such that we can detect when the guest
> + * MMU gets turned on and do the necessary cache maintenance
> + * then.
> + */
> + if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
> vcpu->arch.hcr_el2 |= HCR_TVM;
> - }
> -
> - if (cpus_have_final_cap(ARM64_HAS_EVT) &&
> - !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
> - vcpu->arch.hcr_el2 |= HCR_TID4;
> - else
> - vcpu->arch.hcr_el2 |= HCR_TID2;
> -
> - if (vcpu_el1_is_32bit(vcpu))
> - vcpu->arch.hcr_el2 &= ~HCR_RW;
> -
> - if (kvm_has_mte(vcpu->kvm))
> - vcpu->arch.hcr_el2 |= HCR_ATA;
> }
>
> static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 8170c04fde91..212ae77eefaf 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -1122,7 +1122,7 @@ int __init populate_nv_trap_config(void);
> bool lock_all_vcpus(struct kvm *kvm);
> void unlock_all_vcpus(struct kvm *kvm);
>
> -void kvm_init_sysreg(struct kvm_vcpu *);
> +void kvm_calculate_traps(struct kvm_vcpu *);
>
> /* MMIO helpers */
> void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 9996a989b52e..6b217afb4e8e 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -797,7 +797,7 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
> * This needs to happen after NV has imposed its own restrictions on
> * the feature set
> */
> - kvm_init_sysreg(vcpu);
> + kvm_calculate_traps(vcpu);
>
> ret = kvm_timer_enable(vcpu);
> if (ret)
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 22b45a15d068..41741bf4d2b2 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -4041,11 +4041,33 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *
> return 0;
> }
>
> -void kvm_init_sysreg(struct kvm_vcpu *vcpu)
> +static void vcpu_set_hcr(struct kvm_vcpu *vcpu)
> {
> struct kvm *kvm = vcpu->kvm;
>
> - mutex_lock(&kvm->arch.config_lock);
> + if (has_vhe() || has_hvhe())
> + vcpu->arch.hcr_el2 |= HCR_E2H;
> + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
> + /* route synchronous external abort exceptions to EL2 */
> + vcpu->arch.hcr_el2 |= HCR_TEA;
> + /* trap error record accesses */
> + vcpu->arch.hcr_el2 |= HCR_TERR;
> + }
> +
> + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
> + vcpu->arch.hcr_el2 |= HCR_FWB;
> +
> + if (cpus_have_final_cap(ARM64_HAS_EVT) &&
> + !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
> + vcpu->arch.hcr_el2 |= HCR_TID4;
> + else
> + vcpu->arch.hcr_el2 |= HCR_TID2;
> +
> + if (vcpu_el1_is_32bit(vcpu))
> + vcpu->arch.hcr_el2 &= ~HCR_RW;
> +
> + if (kvm_has_mte(vcpu->kvm))
> + vcpu->arch.hcr_el2 |= HCR_ATA;
>
> /*
> * In the absence of FGT, we cannot independently trap TLBI
> @@ -4054,6 +4076,14 @@ void kvm_init_sysreg(struct kvm_vcpu *vcpu)
> */
> if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
> vcpu->arch.hcr_el2 |= HCR_TTLBOS;
> +}
> +
> +void kvm_calculate_traps(struct kvm_vcpu *vcpu)
> +{
> + struct kvm *kvm = vcpu->kvm;
> +
> + mutex_lock(&kvm->arch.config_lock);
> + vcpu_set_hcr(vcpu);
>
> if (cpus_have_final_cap(ARM64_HAS_HCX)) {
> vcpu->arch.hcrx_el2 = HCRX_GUEST_FLAGS;

--
Shaoqin


2024-05-27 14:55:11

by Sebastian Ott

[permalink] [raw]
Subject: Re: [PATCH v3 1/6] KVM: arm64: unify code to prepare traps

Hi Shaoqin,

On Mon, 27 May 2024, Shaoqin Huang wrote:
> On 5/14/24 15:22, Sebastian Ott wrote:
>> +++ b/arch/arm64/include/asm/kvm_emulate.h
>> static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
>> {
>> - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
>> - if (has_vhe() || has_hvhe())
>> - vcpu->arch.hcr_el2 |= HCR_E2H;
>> - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
>> - /* route synchronous external abort exceptions to EL2 */
>> - vcpu->arch.hcr_el2 |= HCR_TEA;
>> - /* trap error record accesses */
>> - vcpu->arch.hcr_el2 |= HCR_TERR;
>> - }
>> + if (!vcpu_has_run_once(vcpu))
>> + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
>
> Could you give more explaination in your comments about why we still keep the
> non-FWB handling in vcpu_reset_hcr()? That would be better for understanding
> the special case.
>

VM ops trapping needs to be toggled via KVM_ARM_VCPU_INIT for this case to
catch when the guest activates its MMU. This is different to the other
traps that are setup once before the guest runs for the first time.

Sebastian


2024-05-29 10:37:18

by Eric Auger

[permalink] [raw]
Subject: Re: [PATCH v3 3/6] KVM: arm64: add emulation for CTR_EL0 register

Hi Sebastian,
On 5/14/24 09:22, Sebastian Ott wrote:
> CTR_EL0 is currently handled as an invariant register, thus
> guests will be presented with the host value of that register.
>
> Add emulation for CTR_EL0 based on a per VM value. Userspace can
> switch off DIC and IDC bits and reduce DminLine and IminLine sizes.
>
> When CTR_EL0 is changed validate that against CLIDR_EL1 and CCSIDR_EL1
> to make sure we present the guest with consistent register values.
> Changes that affect the generated cache topology values are allowed if
> they don't clash with previous register writes.
>
> Signed-off-by: Sebastian Ott <[email protected]>
> ---
> arch/arm64/kvm/sys_regs.c | 130 +++++++++++++++++++++++++++++++++-----
> 1 file changed, 114 insertions(+), 16 deletions(-)
>
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 0213c96f73f2..8e8acf3dd9bd 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -215,13 +215,8 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
> /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
> #define CSSELR_MAX 14
>
> -/*
> - * Returns the minimum line size for the selected cache, expressed as
> - * Log2(bytes).
> - */
> -static u8 get_min_cache_line_size(struct kvm *kvm, bool icache)
> +static u8 __get_min_cache_line_size(u64 ctr, bool icache)
> {
> - u64 ctr = kvm->arch.ctr_el0;
> u8 field;
>
> if (icache)
> @@ -240,6 +235,15 @@ static u8 get_min_cache_line_size(struct kvm *kvm, bool icache)
> return field + 2;
> }
>
> +/*
> + * Returns the minimum line size for the selected cache, expressed as
> + * Log2(bytes).
> + */
> +static u8 get_min_cache_line_size(struct kvm *kvm, bool icache)
> +{
> + return __get_min_cache_line_size(kvm->arch.ctr_el0, icache);
> +}
> +
> /* Which cache CCSIDR represents depends on CSSELR value. */
> static u32 get_ccsidr(struct kvm_vcpu *vcpu, u32 csselr)
> {
> @@ -1880,6 +1884,45 @@ static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
> return 0;
> }
>
> +static const struct sys_reg_desc *get_sys_reg_desc(u32 encoding);
> +
> +static int validate_clidr_el1(u64 clidr_el1, u64 ctr_el0)
> +{
> + u64 idc = !CLIDR_LOC(clidr_el1) ||
> + (!CLIDR_LOUIS(clidr_el1) && !CLIDR_LOUU(clidr_el1));
This actually computes:
CLIDR_EL1.LoC == 0b000 or (CLIDR_EL1.LoUIS == 0b000 &&
CLIDR_EL1.LoUU == 0b000)

refering to ARM ARM
Terminology for Clean, Invalidate, and Clean and Invalidate instructions

1) If the LoC field value is 0x0, this means that no levels of cache
need to cleaned or invalidated
when cleaning or invalidating to the Point of Coherency.

2) If the LoUU field value is 0x0, this means that no levels of data
cache need to be cleaned or
invalidated when cleaning or invalidating to the Point of Unification.

3) If the LoUIS field value is 0x0, this means that no levels of data or
unified cache need to
cleaned or invalidated when cleaning or invalidating to the Point of
Unification for the Inner Shareable shareability domain.

so to me if above computation is true this means who have no level of
cache to take care of, so although CTR_EL0.IDC = 0 would normally mean
you must "Data cache clean to the Point of Unification" that is not
needed in that case.

But the spec does not really state that IDC=0 and
no_level_of_cache_to_clean_inv are incompatible as far as I see


> +
> + if ((clidr_el1 & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc))> + return -EINVAL;

Isn't (clidr_el1 & CLIDR_EL1_RES0) already checked by

{ SYS_DESC(SYS_CLIDR_EL1), access_clidr, reset_clidr, CLIDR_EL1,
.set_user = set_clidr, .val = ~CLIDR_EL1_RES0 },

> +
> + return 0;
> +}
> +
> +static int validate_cache_top(struct kvm_vcpu *vcpu, u64 ctr_el0)
s/top/topology?
> +{
> + const struct sys_reg_desc *clidr_el1;
> + unsigned int i;
> + int ret;
> +
> + clidr_el1 = get_sys_reg_desc(SYS_CLIDR_EL1);
> + if (!clidr_el1)
> + return -ENOENT;
> +
> + ret = validate_clidr_el1(__vcpu_sys_reg(vcpu, clidr_el1->reg), ctr_el0);
> + if (ret)
> + return ret;
> +
> + if (!vcpu->arch.ccsidr)
> + return 0;
> +
worth to add a comment about what this does as this is not
straighforward ;-)
> + for (i = 0; i < CSSELR_MAX; i++) {
> + if ((FIELD_GET(CCSIDR_EL1_LineSize, get_ccsidr(vcpu, i)) + 4)
maybe use a local variable such as log2_cache_bytes
> + < __get_min_cache_line_size(ctr_el0, i & CSSELR_EL1_InD))
I don't get i & CSSELR_EL1_InD, please can you explain?
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
> const struct sys_reg_desc *r)
> {
> @@ -1890,6 +1933,55 @@ static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
> return true;
> }
>
> +static u64 reset_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
> +{
> + vcpu->kvm->arch.ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
> + return vcpu->kvm->arch.ctr_el0;
> +}
> +
> +static int get_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
> + u64 *val)
> +{
> + *val = vcpu->kvm->arch.ctr_el0;
> + return 0;
> +}
> +
> +static int set_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
> + u64 val)
> +{
don't you need to take the config_lock earlier as in set_id_reg()? isn't
it racy versus has_ran_once?
> + u64 ctr = vcpu->kvm->arch.ctr_el0;
> + u64 writable_mask = rd->val;
> + int ret;
> +
> + if (val == ctr)
> + return 0;
> +
> + if (kvm_vm_has_ran_once(vcpu->kvm))> + return -EBUSY;> +
> + if ((ctr & ~writable_mask) != (val & ~writable_mask))
> + return -EINVAL;
> +
> + if (((ctr & CTR_EL0_DIC_MASK) < (val & CTR_EL0_DIC_MASK)) ||
> + ((ctr & CTR_EL0_IDC_MASK) < (val & CTR_EL0_IDC_MASK)) ||
> + ((ctr & CTR_EL0_DminLine_MASK) < (val & CTR_EL0_DminLine_MASK)) ||
> + ((ctr & CTR_EL0_IminLine_MASK) < (val & CTR_EL0_IminLine_MASK))) {
> + return -EINVAL;
> + }
> +
> + mutex_lock(&vcpu->kvm->arch.config_lock);
> + ret = validate_cache_top(vcpu, val);
> + if (ret) {
> + mutex_unlock(&vcpu->kvm->arch.config_lock);
> + return ret;
nit use a goto out
> + }
> +
> + vcpu->kvm->arch.ctr_el0 = val;
out:
> + mutex_unlock(&vcpu->kvm->arch.config_lock);
> +
> + return 0;
> +}
> +
> static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
> const struct sys_reg_desc *r)
> {
> @@ -1959,10 +2051,9 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
> static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
> u64 val)
> {
> - u64 idc = !CLIDR_LOC(val) || (!CLIDR_LOUIS(val) && !CLIDR_LOUU(val));
> u64 ctr_el0 = vcpu->kvm->arch.ctr_el0;
>
> - if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc))
> + if (validate_clidr_el1(val, ctr_el0))
> return -EINVAL;
>
> __vcpu_sys_reg(vcpu, rd->reg) = val;
> @@ -2475,7 +2566,11 @@ static const struct sys_reg_desc sys_reg_descs[] = {
> { SYS_DESC(SYS_CCSIDR2_EL1), undef_access },
> { SYS_DESC(SYS_SMIDR_EL1), undef_access },
> { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
> - { SYS_DESC(SYS_CTR_EL0), access_ctr },
> + { SYS_DESC(SYS_CTR_EL0), access_ctr, .reset = reset_ctr,
> + .get_user = get_ctr, .set_user = set_ctr, .val = (CTR_EL0_DIC_MASK |
> + CTR_EL0_IDC_MASK |
> + CTR_EL0_DminLine_MASK |
> + CTR_EL0_IminLine_MASK)},
> { SYS_DESC(SYS_SVCR), undef_access },
>
> { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
> @@ -3651,6 +3746,13 @@ static bool index_to_params(u64 id, struct sys_reg_params *params)
> }
> }
>
> +static const struct sys_reg_desc *get_sys_reg_desc(u32 encoding)
> +{
> + struct sys_reg_params params = encoding_to_params(encoding);
> +
> + return find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
> +}
> +
> const struct sys_reg_desc *get_reg_by_id(u64 id,
> const struct sys_reg_desc table[],
> unsigned int num)
> @@ -3704,18 +3806,11 @@ FUNCTION_INVARIANT(midr_el1)
> FUNCTION_INVARIANT(revidr_el1)
> FUNCTION_INVARIANT(aidr_el1)
>
> -static u64 get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
> -{
> - ((struct sys_reg_desc *)r)->val = read_sanitised_ftr_reg(SYS_CTR_EL0);
> - return ((struct sys_reg_desc *)r)->val;
> -}
> -
> /* ->val is filled in by kvm_sys_reg_table_init() */
> static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = {
> { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
> { SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
> { SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
> - { SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
> };
>
> static int get_invariant_sys_reg(u64 id, u64 __user *uaddr)
> @@ -4083,6 +4178,9 @@ static void vcpu_set_hcr(struct kvm_vcpu *vcpu)
> */
> if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
> vcpu->arch.hcr_el2 |= HCR_TTLBOS;
> +
> + if (kvm->arch.ctr_el0 != read_sanitised_ftr_reg(SYS_CTR_EL0))
> + vcpu->arch.hcr_el2 |= HCR_TID2;
> }
>
> void kvm_calculate_traps(struct kvm_vcpu *vcpu)

Thanks

Eric


2024-05-29 13:59:02

by Eric Auger

[permalink] [raw]
Subject: Re: [PATCH v3 5/6] KVM: arm64: rename functions for invariant sys regs

Hi Sebastian,

On 5/14/24 09:22, Sebastian Ott wrote:
> Invariant system id registers are populated with host values
> at initialization time using their .reset function cb.
get_##reg call read_sysreg(). I don't really understand the above
sentence. Please could you elaborate?

Thanks

Eric
>
> These are currently called get_* which is usually used by
> the functions implementing the .get_user callback.
>
> Change their function names to reset_* to reflect what they
> are used for.
>
> Signed-off-by: Sebastian Ott <[email protected]>
> ---
> arch/arm64/kvm/sys_regs.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 1b6ab483e21e..bae72a4ce72b 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -3795,8 +3795,8 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
> */
>
> #define FUNCTION_INVARIANT(reg) \
> - static u64 get_##reg(struct kvm_vcpu *v, \
> - const struct sys_reg_desc *r) \
> + static u64 reset_##reg(struct kvm_vcpu *v, \
> + const struct sys_reg_desc *r) \
> { \
> ((struct sys_reg_desc *)r)->val = read_sysreg(reg); \
> return ((struct sys_reg_desc *)r)->val; \
> @@ -3808,9 +3808,9 @@ FUNCTION_INVARIANT(aidr_el1)
>
> /* ->val is filled in by kvm_sys_reg_table_init() */
> static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = {
> - { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
> - { SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
> - { SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
> + { SYS_DESC(SYS_MIDR_EL1), NULL, reset_midr_el1 },
> + { SYS_DESC(SYS_REVIDR_EL1), NULL, reset_revidr_el1 },
> + { SYS_DESC(SYS_AIDR_EL1), NULL, reset_aidr_el1 },
> };
>
> static int get_invariant_sys_reg(u64 id, u64 __user *uaddr)


2024-05-29 15:47:02

by Sebastian Ott

[permalink] [raw]
Subject: Re: [PATCH v3 5/6] KVM: arm64: rename functions for invariant sys regs

Hej Eric,

On Wed, 29 May 2024, Eric Auger wrote:
> On 5/14/24 09:22, Sebastian Ott wrote:
>> Invariant system id registers are populated with host values
>> at initialization time using their .reset function cb.
> get_##reg call read_sysreg(). I don't really understand the above
> sentence. Please could you elaborate?
>

struct sys_reg_desc has 2 function pointers (among others):
reset and .get_user . The functions implementing these are
usually named accordingly. For invariant registers only
reset is used but set to functions that are called get_*
(which is usually used to implement .get_user).

E.g.: invariant_sys_regs[0].reset == get_midr_el1

When trying to figure out this code I was confused by this, hence
this patch..

Sebastian


2024-05-29 17:19:30

by Eric Auger

[permalink] [raw]
Subject: Re: [PATCH v3 5/6] KVM: arm64: rename functions for invariant sys regs

Hi Sebastian,

On 5/29/24 17:29, Sebastian Ott wrote:
> Hej Eric,
>
> On Wed, 29 May 2024, Eric Auger wrote:
>> On 5/14/24 09:22, Sebastian Ott wrote:
>>> Invariant system id registers are populated with host values
>>> at initialization time using their .reset function cb.
>> get_##reg call read_sysreg(). I don't really understand the above
>> sentence. Please could you elaborate?
>>
>
> struct sys_reg_desc has 2 function pointers (among others):
> .reset and .get_user . The functions implementing these are
> usually named accordingly. For invariant registers only
> .reset is used but set to functions that are called get_*
> (which is usually used to implement .get_user).
>
> E.g.: invariant_sys_regs[0].reset == get_midr_el1
>
> When trying to figure out this code I was confused by this, hence
> this patch..

Ah OK, since ".reset=" was not used in the initilization I missed that.

feel free to add my
Reviewed-by: Eric Auger <[email protected]>


Eric
>
> Sebastian
>


2024-05-30 13:03:14

by Sebastian Ott

[permalink] [raw]
Subject: Re: [PATCH v3 3/6] KVM: arm64: add emulation for CTR_EL0 register

Hej Eric,

On Wed, 29 May 2024, Eric Auger wrote:
> On 5/14/24 09:22, Sebastian Ott wrote:
>> +static int validate_clidr_el1(u64 clidr_el1, u64 ctr_el0)
>> +{
>> + u64 idc = !CLIDR_LOC(clidr_el1) ||
>> + (!CLIDR_LOUIS(clidr_el1) && !CLIDR_LOUU(clidr_el1));
> This actually computes:
> CLIDR_EL1.LoC == 0b000 or (CLIDR_EL1.LoUIS == 0b000 &&
> CLIDR_EL1.LoUU == 0b000)
>
> refering to ARM ARM
> Terminology for Clean, Invalidate, and Clean and Invalidate instructions
>
> 1) If the LoC field value is 0x0, this means that no levels of cache
> need to cleaned or invalidated
> when cleaning or invalidating to the Point of Coherency.
>
> 2) If the LoUU field value is 0x0, this means that no levels of data
> cache need to be cleaned or
> invalidated when cleaning or invalidating to the Point of Unification.
>
> 3) If the LoUIS field value is 0x0, this means that no levels of data or
> unified cache need to
> cleaned or invalidated when cleaning or invalidating to the Point of
> Unification for the Inner Shareable shareability domain.
>
> so to me if above computation is true this means who have no level of
> cache to take care of, so although CTR_EL0.IDC = 0 would normally mean
> you must "Data cache clean to the Point of Unification" that is not
> needed in that case.
>
> But the spec does not really state that IDC=0 and
> no_level_of_cache_to_clean_inv are incompatible as far as I see

This is just existing code moved to a helper..

>> + if ((clidr_el1 & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc))> + return -EINVAL;
>
> Isn't (clidr_el1 & CLIDR_EL1_RES0) already checked by
>
> { SYS_DESC(SYS_CLIDR_EL1), access_clidr, reset_clidr, CLIDR_EL1,
> .set_user = set_clidr, .val = ~CLIDR_EL1_RES0 },
>

Nope, that would only be the case when arm64_check_features()
is used (by having set_id_reg() for the .set_user callback).

>> +static int validate_cache_top(struct kvm_vcpu *vcpu, u64 ctr_el0)
> s/top/topology?

Hm, that name is already quiet long.

>> +{
>> + const struct sys_reg_desc *clidr_el1;
>> + unsigned int i;
>> + int ret;
>> +
>> + clidr_el1 = get_sys_reg_desc(SYS_CLIDR_EL1);
>> + if (!clidr_el1)
>> + return -ENOENT;
>> +
>> + ret = validate_clidr_el1(__vcpu_sys_reg(vcpu, clidr_el1->reg), ctr_el0);
>> + if (ret)
>> + return ret;
>> +
>> + if (!vcpu->arch.ccsidr)
>> + return 0;
>> +
> worth to add a comment about what this does as this is not
> straighforward ;-)

Hm, "check for validity of the cache topology" - that's kinda the
functions name, so no added value. "Make sure the cache line size
per level obeys the minimum cache line setting" - would this help?
Can't think of smth else right now, sry. Suggestions?

>> + for (i = 0; i < CSSELR_MAX; i++) {
>> + if ((FIELD_GET(CCSIDR_EL1_LineSize, get_ccsidr(vcpu, i)) + 4)
> maybe use a local variable such as log2_cache_bytes
>> + < __get_min_cache_line_size(ctr_el0, i & CSSELR_EL1_InD))
> I don't get i & CSSELR_EL1_InD, please can you explain?

It flags the cache at this level as a data or instruction cache (see also
get_ccsidr()).

>> +static int set_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
>> + u64 val)
>> +{
> don't you need to take the config_lock earlier as in set_id_reg()? isn't
> it racy versus has_ran_once?

I was about to write that this is not the case since that's an rcu
accessed pointer not guarded by the config lock but I confused this
with the vcpu_has_run_once() .... again :-(

I'm not a 100% sure we really need that but I'll just move the lock up -
it definitely doesn't hurt.

>> + u64 ctr = vcpu->kvm->arch.ctr_el0;
>> + u64 writable_mask = rd->val;
>> + int ret;
>> +
>> + if (val == ctr)
>> + return 0;
>> +
>> + if (kvm_vm_has_ran_once(vcpu->kvm))> + return -EBUSY;> +
>> + if ((ctr & ~writable_mask) != (val & ~writable_mask))
>> + return -EINVAL;
>> +
>> + if (((ctr & CTR_EL0_DIC_MASK) < (val & CTR_EL0_DIC_MASK)) ||
>> + ((ctr & CTR_EL0_IDC_MASK) < (val & CTR_EL0_IDC_MASK)) ||
>> + ((ctr & CTR_EL0_DminLine_MASK) < (val & CTR_EL0_DminLine_MASK)) ||
>> + ((ctr & CTR_EL0_IminLine_MASK) < (val & CTR_EL0_IminLine_MASK))) {
>> + return -EINVAL;
>> + }
>> +
>> + mutex_lock(&vcpu->kvm->arch.config_lock);
>> + ret = validate_cache_top(vcpu, val);
>> + if (ret) {
>> + mutex_unlock(&vcpu->kvm->arch.config_lock);
>> + return ret;
> nit use a goto out

Thanks,
Sebastian


2024-05-30 16:55:13

by Eric Auger

[permalink] [raw]
Subject: Re: [PATCH v3 1/6] KVM: arm64: unify code to prepare traps

Hi Sebastian,

On 5/14/24 09:22, Sebastian Ott wrote:
> There are 2 functions to calculate traps via HCR_EL2:
> * kvm_init_sysreg() called via KVM_RUN (before the 1st run or when
> the pid changes)
> * vcpu_reset_hcr() called via KVM_ARM_VCPU_INIT
>
> To unify these 2 and to support traps that are dependent on the
> ID register configuration, move the code from vcpu_reset_hcr()
> to sys_regs.c and call it via kvm_init_sysreg().
>
> We still have to keep the non-FWB handling stuff in vcpu_reset_hcr().
> Also the initialization with HCR_GUEST_FLAGS is kept there but guarded
> by !vcpu_has_run_once() to ensure that previous calculated values
> don't get overwritten.
>
> While at it rename kvm_init_sysreg() to kvm_calculate_traps() to
> better reflect what it's doing.
>
> Signed-off-by: Sebastian Ott <[email protected]>

Looks good to me
Reviewed-by: Eric Auger <[email protected]>

Eric

> ---
> arch/arm64/include/asm/kvm_emulate.h | 40 +++++++---------------------
> arch/arm64/include/asm/kvm_host.h | 2 +-
> arch/arm64/kvm/arm.c | 2 +-
> arch/arm64/kvm/sys_regs.c | 34 +++++++++++++++++++++--
> 4 files changed, 43 insertions(+), 35 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index 501e3e019c93..84dc3fac9711 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -69,39 +69,17 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
>
> static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
> {
> - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
> - if (has_vhe() || has_hvhe())
> - vcpu->arch.hcr_el2 |= HCR_E2H;
> - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
> - /* route synchronous external abort exceptions to EL2 */
> - vcpu->arch.hcr_el2 |= HCR_TEA;
> - /* trap error record accesses */
> - vcpu->arch.hcr_el2 |= HCR_TERR;
> - }
> + if (!vcpu_has_run_once(vcpu))
> + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
>
> - if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) {
> - vcpu->arch.hcr_el2 |= HCR_FWB;
> - } else {
> - /*
> - * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
> - * get set in SCTLR_EL1 such that we can detect when the guest
> - * MMU gets turned on and do the necessary cache maintenance
> - * then.
> - */
> + /*
> + * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
> + * get set in SCTLR_EL1 such that we can detect when the guest
> + * MMU gets turned on and do the necessary cache maintenance
> + * then.
> + */
> + if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
> vcpu->arch.hcr_el2 |= HCR_TVM;
> - }
> -
> - if (cpus_have_final_cap(ARM64_HAS_EVT) &&
> - !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
> - vcpu->arch.hcr_el2 |= HCR_TID4;
> - else
> - vcpu->arch.hcr_el2 |= HCR_TID2;
> -
> - if (vcpu_el1_is_32bit(vcpu))
> - vcpu->arch.hcr_el2 &= ~HCR_RW;
> -
> - if (kvm_has_mte(vcpu->kvm))
> - vcpu->arch.hcr_el2 |= HCR_ATA;
> }
>
> static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 8170c04fde91..212ae77eefaf 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -1122,7 +1122,7 @@ int __init populate_nv_trap_config(void);
> bool lock_all_vcpus(struct kvm *kvm);
> void unlock_all_vcpus(struct kvm *kvm);
>
> -void kvm_init_sysreg(struct kvm_vcpu *);
> +void kvm_calculate_traps(struct kvm_vcpu *);
>
> /* MMIO helpers */
> void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 9996a989b52e..6b217afb4e8e 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -797,7 +797,7 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
> * This needs to happen after NV has imposed its own restrictions on
> * the feature set
> */
> - kvm_init_sysreg(vcpu);
> + kvm_calculate_traps(vcpu);
>
> ret = kvm_timer_enable(vcpu);
> if (ret)
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 22b45a15d068..41741bf4d2b2 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -4041,11 +4041,33 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *
> return 0;
> }
>
> -void kvm_init_sysreg(struct kvm_vcpu *vcpu)
> +static void vcpu_set_hcr(struct kvm_vcpu *vcpu)
> {
> struct kvm *kvm = vcpu->kvm;
>
> - mutex_lock(&kvm->arch.config_lock);
> + if (has_vhe() || has_hvhe())
> + vcpu->arch.hcr_el2 |= HCR_E2H;
> + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
> + /* route synchronous external abort exceptions to EL2 */
> + vcpu->arch.hcr_el2 |= HCR_TEA;
> + /* trap error record accesses */
> + vcpu->arch.hcr_el2 |= HCR_TERR;
> + }
> +
> + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
> + vcpu->arch.hcr_el2 |= HCR_FWB;
> +
> + if (cpus_have_final_cap(ARM64_HAS_EVT) &&
> + !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
> + vcpu->arch.hcr_el2 |= HCR_TID4;
> + else
> + vcpu->arch.hcr_el2 |= HCR_TID2;
> +
> + if (vcpu_el1_is_32bit(vcpu))
> + vcpu->arch.hcr_el2 &= ~HCR_RW;
> +
> + if (kvm_has_mte(vcpu->kvm))
> + vcpu->arch.hcr_el2 |= HCR_ATA;
>
> /*
> * In the absence of FGT, we cannot independently trap TLBI
> @@ -4054,6 +4076,14 @@ void kvm_init_sysreg(struct kvm_vcpu *vcpu)
> */
> if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
> vcpu->arch.hcr_el2 |= HCR_TTLBOS;
> +}
> +
> +void kvm_calculate_traps(struct kvm_vcpu *vcpu)
> +{
> + struct kvm *kvm = vcpu->kvm;
> +
> + mutex_lock(&kvm->arch.config_lock);
> + vcpu_set_hcr(vcpu);
>
> if (cpus_have_final_cap(ARM64_HAS_HCX)) {
> vcpu->arch.hcrx_el2 = HCRX_GUEST_FLAGS;


2024-05-30 17:21:11

by Eric Auger

[permalink] [raw]
Subject: Re: [PATCH v3 3/6] KVM: arm64: add emulation for CTR_EL0 register



On 5/30/24 14:56, Sebastian Ott wrote:
> Hej Eric,
>
> On Wed, 29 May 2024, Eric Auger wrote:
>> On 5/14/24 09:22, Sebastian Ott wrote:
>>> +static int validate_clidr_el1(u64 clidr_el1, u64 ctr_el0)
>>> +{
>>> +    u64 idc = !CLIDR_LOC(clidr_el1) ||
>>> +          (!CLIDR_LOUIS(clidr_el1) && !CLIDR_LOUU(clidr_el1));
>> This actually computes:
>> CLIDR_EL1.LoC == 0b000 or (CLIDR_EL1.LoUIS == 0b000 &&
>> CLIDR_EL1.LoUU == 0b000)
>>
>> refering to ARM ARM
>> Terminology for Clean, Invalidate, and Clean and Invalidate instructions
>>
>> 1) If the LoC field value is 0x0, this means that no levels of cache
>> need to cleaned or invalidated
>> when cleaning or invalidating to the Point of Coherency.
>>
>> 2) If the LoUU field value is 0x0, this means that no levels of data
>> cache need to be cleaned or
>> invalidated when cleaning or invalidating to the Point of Unification.
>>
>> 3) If the LoUIS field value is 0x0, this means that no levels of data or
>> unified cache need to
>> cleaned or invalidated when cleaning or invalidating to the Point of
>> Unification for the Inner Shareable shareability domain.
>>
>> so to me if above computation is true this means who have no level of
>> cache to take care of, so although CTR_EL0.IDC = 0 would normally mean
>> you must "Data cache clean to the Point of Unification" that is not
>> needed in that case.
>>
>> But the spec does not really state that IDC=0 and
>> no_level_of_cache_to_clean_inv are incompatible as far as I see
>
> This is just existing code moved to a helper..
agreed this comment/question is not related to your patch

>
>>> +    if ((clidr_el1 & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) &&
>>> idc))> +        return -EINVAL;
>>
>> Isn't (clidr_el1 & CLIDR_EL1_RES0) already checked by
>>
>>        { SYS_DESC(SYS_CLIDR_EL1), access_clidr, reset_clidr, CLIDR_EL1,
>>          .set_user = set_clidr, .val = ~CLIDR_EL1_RES0 },
>>
>
> Nope, that would only be the case when arm64_check_features()
> is used (by having set_id_reg() for the .set_user callback).
OK
>
>>> +static int validate_cache_top(struct kvm_vcpu *vcpu, u64 ctr_el0)
>> s/top/topology?
>
> Hm, that name is already quiet long.
yes but top does not mean much
>
>>> +{
>>> +    const struct sys_reg_desc *clidr_el1;
>>> +    unsigned int i;
>>> +    int ret;
>>> +
>>> +    clidr_el1 = get_sys_reg_desc(SYS_CLIDR_EL1);
>>> +    if (!clidr_el1)
>>> +        return -ENOENT;
>>> +
>>> +    ret = validate_clidr_el1(__vcpu_sys_reg(vcpu, clidr_el1->reg),
>>> ctr_el0);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    if (!vcpu->arch.ccsidr)
>>> +        return 0;
>>> +
>> worth to add a comment about what this does as this is not
>> straighforward ;-)
>
> Hm, "check for validity of the cache topology" - that's kinda the
> functions name, so no added value. "Make sure the cache line size
> per level obeys the minimum cache line setting" - would this help?
> Can't think of smth else right now, sry. Suggestions?
yes the latter is fine to me
>
>>> +    for (i = 0; i < CSSELR_MAX; i++) {
>>> +        if ((FIELD_GET(CCSIDR_EL1_LineSize, get_ccsidr(vcpu, i)) + 4)
>> maybe use a local variable such as log2_cache_bytes
>>> +            < __get_min_cache_line_size(ctr_el0, i & CSSELR_EL1_InD))
>> I don't get i & CSSELR_EL1_InD, please can you explain?
>
> It flags the cache at this level as a data or instruction cache (see also
> get_ccsidr()).
OK I understand the principle now. thank you
>
>>> +static int set_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc
>>> *rd,
>>> +           u64 val)
>>> +{
>> don't you need to take the config_lock earlier as in set_id_reg()? isn't
>> it racy versus has_ran_once?
>
> I was about to write that this is not the case since that's an rcu
> accessed pointer not guarded by the config lock but I confused this
> with the vcpu_has_run_once() .... again :-(
>
> I'm not a 100% sure we really need that but I'll just move the lock up -
> it definitely doesn't hurt.
yup

Eric
>
>>> +    u64 ctr = vcpu->kvm->arch.ctr_el0;
>>> +    u64 writable_mask = rd->val;
>>> +    int ret;
>>> +
>>> +    if (val == ctr)
>>> +        return 0;
>>> +
>>> +    if (kvm_vm_has_ran_once(vcpu->kvm))> +        return -EBUSY;> +
>>> +    if ((ctr & ~writable_mask) != (val & ~writable_mask))
>>> +        return -EINVAL;
>>> +
>>> +    if (((ctr & CTR_EL0_DIC_MASK) < (val & CTR_EL0_DIC_MASK)) ||
>>> +        ((ctr & CTR_EL0_IDC_MASK) < (val & CTR_EL0_IDC_MASK)) ||
>>> +        ((ctr & CTR_EL0_DminLine_MASK) < (val &
>>> CTR_EL0_DminLine_MASK)) ||
>>> +        ((ctr & CTR_EL0_IminLine_MASK) < (val &
>>> CTR_EL0_IminLine_MASK))) {
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    mutex_lock(&vcpu->kvm->arch.config_lock);
>>> +    ret = validate_cache_top(vcpu, val);
>>> +    if (ret) {
>>> +        mutex_unlock(&vcpu->kvm->arch.config_lock);
>>> +        return ret;
>> nit use a goto out
>
> Thanks,
> Sebastian
>