2024-06-06 15:49:15

by Mark Brown

[permalink] [raw]
Subject: [PATCH v2 0/4] KVM: arm64: Fix underallocation of storage for SVE state

As observed during review the pKVM support for saving host SVE state is
broken if an asymmetric system has VLs larger than the maximum shared
VL, fix this by discovering then using the maximum VL for allocations
and using RDVL during the save/restore process.

Signed-off-by: Mark Brown <[email protected]>
---
Changes in v2:
- Downgrade check for a late CPU increasing maximum VL to a warning only
but do it unconditionally since pKVM prevents late CPUs anyway.
- Commit log tweaks.
- Link to v1: https://lore.kernel.org/r/[email protected]

---
Mark Brown (4):
arm64/fpsimd: Introduce __bit_to_vl() helper
arm64/fpsimd: Discover maximum vector length implemented by any CPU
KVM: arm64: Fix FFR offset calculation for pKVM host state save and restore
KVM: arm64: Avoid underallocating storage for host SVE state

arch/arm64/include/asm/fpsimd.h | 17 +++++++++++++++
arch/arm64/include/asm/kvm_host.h | 2 +-
arch/arm64/include/asm/kvm_hyp.h | 3 ++-
arch/arm64/include/asm/kvm_pkvm.h | 2 +-
arch/arm64/kernel/fpsimd.c | 38 +++++++++++++++++++++++++++------
arch/arm64/kvm/hyp/fpsimd.S | 5 +++++
arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +-
arch/arm64/kvm/hyp/nvhe/hyp-main.c | 6 +++---
arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +-
arch/arm64/kvm/reset.c | 6 +++---
10 files changed, 65 insertions(+), 18 deletions(-)
---
base-commit: afb91f5f8ad7af172d993a34fde1947892408f53
change-id: 20240604-kvm-arm64-fix-pkvm-sve-vl-13cd71fd7db0

Best regards,
--
Mark Brown <[email protected]>



2024-06-06 16:03:57

by Mark Brown

[permalink] [raw]
Subject: [PATCH v2 2/4] arm64/fpsimd: Discover maximum vector length implemented by any CPU

When discovering the vector lengths for SVE and SME we do not currently
record the maximum VL supported on any individual CPU. This is expected
to be the same for all CPUs but the architecture allows asymmetry, if we
do encounter an asymmetric system then some CPUs may support VLs higher
than the maximum Linux will use. Since the pKVM hypervisor needs to
support saving and restoring anything the host can physically set it
needs to know the maximum value any CPU could have, add support for
enumerating it and validation for late CPUs.

Signed-off-by: Mark Brown <[email protected]>
---
arch/arm64/include/asm/fpsimd.h | 13 +++++++++++++
arch/arm64/kernel/fpsimd.c | 26 +++++++++++++++++++++++++-
2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 51c21265b4fa..cd19713c9deb 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -188,6 +188,9 @@ struct vl_info {
int max_vl;
int max_virtualisable_vl;

+ /* Maximum vector length observed on any CPU */
+ int max_cpu_vl;
+
/*
* Set of available vector lengths,
* where length vq encoded as bit __vq_to_bit(vq):
@@ -278,6 +281,11 @@ static inline int vec_max_virtualisable_vl(enum vec_type type)
return vl_info[type].max_virtualisable_vl;
}

+static inline int vec_max_cpu_vl(enum vec_type type)
+{
+ return vl_info[type].max_cpu_vl;
+}
+
static inline int sve_max_vl(void)
{
return vec_max_vl(ARM64_VEC_SVE);
@@ -288,6 +296,11 @@ static inline int sve_max_virtualisable_vl(void)
return vec_max_virtualisable_vl(ARM64_VEC_SVE);
}

+static inline int sve_max_cpu_vl(void)
+{
+ return vec_max_cpu_vl(ARM64_VEC_SVE);
+}
+
/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
static inline bool vq_available(enum vec_type type, unsigned int vq)
{
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 22542fb81812..ee6fb8c4b16d 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -129,6 +129,7 @@ __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
.min_vl = SVE_VL_MIN,
.max_vl = SVE_VL_MIN,
.max_virtualisable_vl = SVE_VL_MIN,
+ .max_cpu_vl = SVE_VL_MIN,
},
#endif
#ifdef CONFIG_ARM64_SME
@@ -1041,8 +1042,13 @@ static void vec_probe_vqs(struct vl_info *info,
void __init vec_init_vq_map(enum vec_type type)
{
struct vl_info *info = &vl_info[type];
+ unsigned long b;
+
vec_probe_vqs(info, info->vq_map);
bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX);
+
+ b = find_first_bit(info->vq_map, SVE_VQ_MAX);
+ info->max_cpu_vl = __bit_to_vl(b);
}

/*
@@ -1054,11 +1060,16 @@ void vec_update_vq_map(enum vec_type type)
{
struct vl_info *info = &vl_info[type];
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
+ unsigned long b;

vec_probe_vqs(info, tmp_map);
bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX);
bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map,
SVE_VQ_MAX);
+
+ b = find_first_bit(tmp_map, SVE_VQ_MAX);
+ if (__bit_to_vl(b) > info->max_cpu_vl)
+ info->max_cpu_vl = __bit_to_vl(b);
}

/*
@@ -1069,10 +1080,23 @@ int vec_verify_vq_map(enum vec_type type)
{
struct vl_info *info = &vl_info[type];
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
- unsigned long b;
+ unsigned long b, max_vl;

vec_probe_vqs(info, tmp_map);

+ /*
+ * Currently the maximum VL is only used for pKVM which
+ * doesn't allow late CPUs but we don't expect asymmetry and
+ * if we encounter any then future users will need handling so
+ * warn if we see anything.
+ */
+ max_vl = __bit_to_vl(find_first_bit(tmp_map, SVE_VQ_MAX));
+ if (max_vl > info->max_cpu_vl) {
+ pr_warn("%s: cpu%d: increases maximum VL to %u\n",
+ info->name, smp_processor_id(), max_vl);
+ info->max_cpu_vl = max_vl;
+ }
+
bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) {
pr_warn("%s: cpu%d: Required vector length(s) missing\n",

--
2.39.2


2024-06-07 07:35:34

by Fuad Tabba

[permalink] [raw]
Subject: Re: [PATCH v2 2/4] arm64/fpsimd: Discover maximum vector length implemented by any CPU

Hi Mark,

On Thu, Jun 6, 2024 at 4:32 PM Mark Brown <[email protected]> wrote:
>
> When discovering the vector lengths for SVE and SME we do not currently
> record the maximum VL supported on any individual CPU. This is expected
> to be the same for all CPUs but the architecture allows asymmetry, if we
> do encounter an asymmetric system then some CPUs may support VLs higher
> than the maximum Linux will use. Since the pKVM hypervisor needs to
> support saving and restoring anything the host can physically set it
> needs to know the maximum value any CPU could have, add support for
> enumerating it and validation for late CPUs.
>
> Signed-off-by: Mark Brown <[email protected]>
> ---
> arch/arm64/include/asm/fpsimd.h | 13 +++++++++++++
> arch/arm64/kernel/fpsimd.c | 26 +++++++++++++++++++++++++-
> 2 files changed, 38 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 51c21265b4fa..cd19713c9deb 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -188,6 +188,9 @@ struct vl_info {
> int max_vl;
> int max_virtualisable_vl;
>
> + /* Maximum vector length observed on any CPU */
> + int max_cpu_vl;
> +
> /*
> * Set of available vector lengths,
> * where length vq encoded as bit __vq_to_bit(vq):
> @@ -278,6 +281,11 @@ static inline int vec_max_virtualisable_vl(enum vec_type type)
> return vl_info[type].max_virtualisable_vl;
> }
>
> +static inline int vec_max_cpu_vl(enum vec_type type)
> +{
> + return vl_info[type].max_cpu_vl;
> +}
> +
> static inline int sve_max_vl(void)
> {
> return vec_max_vl(ARM64_VEC_SVE);
> @@ -288,6 +296,11 @@ static inline int sve_max_virtualisable_vl(void)
> return vec_max_virtualisable_vl(ARM64_VEC_SVE);
> }
>
> +static inline int sve_max_cpu_vl(void)
> +{
> + return vec_max_cpu_vl(ARM64_VEC_SVE);
> +}
> +
> /* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
> static inline bool vq_available(enum vec_type type, unsigned int vq)
> {
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 22542fb81812..ee6fb8c4b16d 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -129,6 +129,7 @@ __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
> .min_vl = SVE_VL_MIN,
> .max_vl = SVE_VL_MIN,
> .max_virtualisable_vl = SVE_VL_MIN,
> + .max_cpu_vl = SVE_VL_MIN,
> },
> #endif
> #ifdef CONFIG_ARM64_SME
> @@ -1041,8 +1042,13 @@ static void vec_probe_vqs(struct vl_info *info,
> void __init vec_init_vq_map(enum vec_type type)
> {
> struct vl_info *info = &vl_info[type];
> + unsigned long b;
> +
> vec_probe_vqs(info, info->vq_map);
> bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX);
> +
> + b = find_first_bit(info->vq_map, SVE_VQ_MAX);
> + info->max_cpu_vl = __bit_to_vl(b);
> }
>
> /*
> @@ -1054,11 +1060,16 @@ void vec_update_vq_map(enum vec_type type)
> {
> struct vl_info *info = &vl_info[type];
> DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
> + unsigned long b;
>
> vec_probe_vqs(info, tmp_map);
> bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX);
> bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map,
> SVE_VQ_MAX);
> +
> + b = find_first_bit(tmp_map, SVE_VQ_MAX);
> + if (__bit_to_vl(b) > info->max_cpu_vl)
> + info->max_cpu_vl = __bit_to_vl(b);
> }
>
> /*
> @@ -1069,10 +1080,23 @@ int vec_verify_vq_map(enum vec_type type)
> {
> struct vl_info *info = &vl_info[type];
> DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
> - unsigned long b;
> + unsigned long b, max_vl;
>
> vec_probe_vqs(info, tmp_map);
>
> + /*
> + * Currently the maximum VL is only used for pKVM which
> + * doesn't allow late CPUs but we don't expect asymmetry and
> + * if we encounter any then future users will need handling so
> + * warn if we see anything.
> + */
> + max_vl = __bit_to_vl(find_first_bit(tmp_map, SVE_VQ_MAX));
> + if (max_vl > info->max_cpu_vl) {
> + pr_warn("%s: cpu%d: increases maximum VL to %u\n",

This should be %lu since it's an unsigned long. Otherwise it doesn't
build (clang).

Cheers,
/fuad

> + info->name, smp_processor_id(), max_vl);
> + info->max_cpu_vl = max_vl;
> + }
> +
> bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
> if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) {
> pr_warn("%s: cpu%d: Required vector length(s) missing\n",
>
> --
> 2.39.2
>