To add support for encrypted, SEV, guests in the ucall framework
introduce a new "ucall pool" implementation. This was suggested in
the thread on "[RFC PATCH 00/10] KVM: selftests: Add support for
test-selectable ucall implementations". Using a listed as suggested
there doesn't work well because the list is setup using HVAs not GVAs
so use a bitmap + array solution instead to get the same pool of ucall
structs result.
This allows for guests with encryption enabled set up a pool of ucall
structs in the guest's shared memory region.
Suggested-by: Sean Christopherson <[email protected]>
Signed-off-by: Peter Gonda <[email protected]>
---
.../selftests/kvm/include/kvm_util_base.h | 2 +
.../selftests/kvm/include/ucall_common.h | 13 +--
.../testing/selftests/kvm/lib/aarch64/ucall.c | 10 +-
tools/testing/selftests/kvm/lib/riscv/ucall.c | 5 +-
tools/testing/selftests/kvm/lib/s390x/ucall.c | 5 +-
.../testing/selftests/kvm/lib/ucall_common.c | 108 +++++++++++++++++-
.../testing/selftests/kvm/lib/x86_64/ucall.c | 6 +-
7 files changed, 131 insertions(+), 18 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 1a84d2d1d85b..baede0d118c5 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -103,6 +103,8 @@ struct kvm_vm {
int stats_fd;
struct kvm_stats_header stats_header;
struct kvm_stats_desc *stats_desc;
+
+ bool use_ucall_pool;
};
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
index 63bfc60be995..002a22e1cd1d 100644
--- a/tools/testing/selftests/kvm/include/ucall_common.h
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -22,6 +22,9 @@ enum {
struct ucall {
uint64_t cmd;
uint64_t args[UCALL_MAX_ARGS];
+
+ /* For ucall pool usage. */
+ struct ucall *hva;
};
void ucall_arch_init(struct kvm_vm *vm, void *arg);
@@ -32,15 +35,9 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
void ucall(uint64_t cmd, int nargs, ...);
uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
-static inline void ucall_init(struct kvm_vm *vm, void *arg)
-{
- ucall_arch_init(vm, arg);
-}
+void ucall_init(struct kvm_vm *vm, void *arg);
-static inline void ucall_uninit(struct kvm_vm *vm)
-{
- ucall_arch_uninit(vm);
-}
+void ucall_uninit(struct kvm_vm *vm);
#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index 132c0e98bf49..ee70531e8e51 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -81,12 +81,16 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
if (run->exit_reason == KVM_EXIT_MMIO &&
run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
- vm_vaddr_t gva;
+ uint64_t ucall_addr;
TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
"Unexpected ucall exit mmio address access");
- memcpy(&gva, run->mmio.data, sizeof(gva));
- return addr_gva2hva(vcpu->vm, gva);
+ memcpy(&ucall_addr, run->mmio.data, sizeof(ucall_addr));
+
+ if (vcpu->vm->use_ucall_pool)
+ return (void *)ucall_addr;
+ else
+ return addr_gva2hva(vcpu->vm, ucall_addr);
}
return NULL;
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
index 37e091d4366e..4bb5616df29f 100644
--- a/tools/testing/selftests/kvm/lib/riscv/ucall.c
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -59,7 +59,10 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) {
switch (run->riscv_sbi.function_id) {
case KVM_RISCV_SELFTESTS_SBI_UCALL:
- return addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]);
+ if (vcpu->vm->use_ucall_pool)
+ return (void *)run->riscv_sbi.args[0];
+ else
+ return addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]);
case KVM_RISCV_SELFTESTS_SBI_UNEXP:
vcpu_dump(stderr, vcpu, 2);
TEST_ASSERT(0, "Unexpected trap taken by guest");
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
index 0f695a031d35..b24c6649877a 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -30,7 +30,10 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
(run->s390_sieic.ipb >> 16) == 0x501) {
int reg = run->s390_sieic.ipa & 0xf;
- return addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]);
+ if (vcpu->vm->use_ucall_pool)
+ return (void *)run->s.regs.gprs[reg];
+ else
+ return addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]);
}
return NULL;
}
diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
index ced480860746..b6502a9420c4 100644
--- a/tools/testing/selftests/kvm/lib/ucall_common.c
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -1,22 +1,122 @@
// SPDX-License-Identifier: GPL-2.0-only
#include "kvm_util.h"
+#include "linux/types.h"
+#include "linux/bitmap.h"
+#include "linux/atomic.h"
+
+struct ucall_header {
+ DECLARE_BITMAP(in_use, KVM_MAX_VCPUS);
+ struct ucall ucalls[KVM_MAX_VCPUS];
+};
+
+static bool use_ucall_pool;
+static struct ucall_header *ucall_pool;
+
+void ucall_init(struct kvm_vm *vm, void *arg)
+{
+ struct ucall *uc;
+ struct ucall_header *hdr;
+ vm_vaddr_t vaddr;
+ int i;
+
+ use_ucall_pool = vm->use_ucall_pool;
+ sync_global_to_guest(vm, use_ucall_pool);
+ if (!use_ucall_pool)
+ goto out;
+
+ TEST_ASSERT(!ucall_pool, "Only a single encrypted guest at a time for ucalls.");
+ vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), vm->page_size);
+ hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr);
+ memset(hdr, 0, sizeof(*hdr));
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ uc = &hdr->ucalls[i];
+ uc->hva = uc;
+ }
+
+ ucall_pool = (struct ucall_header *)vaddr;
+ sync_global_to_guest(vm, ucall_pool);
+
+out:
+ ucall_arch_init(vm, arg);
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+ use_ucall_pool = false;
+ ucall_pool = NULL;
+
+ if (!vm->memcrypt.encrypted) {
+ sync_global_to_guest(vm, use_ucall_pool);
+ sync_global_to_guest(vm, ucall_pool);
+ }
+
+ ucall_arch_uninit(vm);
+}
+
+static struct ucall *ucall_alloc(void)
+{
+ struct ucall *uc = NULL;
+ int i;
+
+ if (!use_ucall_pool)
+ goto out;
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ if (!atomic_test_and_set_bit(i, ucall_pool->in_use)) {
+ uc = &ucall_pool->ucalls[i];
+ memset(uc->args, 0, sizeof(uc->args));
+ break;
+ }
+ }
+out:
+ return uc;
+}
+
+static inline size_t uc_pool_idx(struct ucall *uc)
+{
+ return uc->hva - ucall_pool->ucalls;
+}
+
+static void ucall_free(struct ucall *uc)
+{
+ if (!use_ucall_pool)
+ return;
+
+ clear_bit(uc_pool_idx(uc), ucall_pool->in_use);
+}
void ucall(uint64_t cmd, int nargs, ...)
{
- struct ucall uc = {};
+ struct ucall *uc;
+ struct ucall tmp = {};
va_list va;
int i;
- WRITE_ONCE(uc.cmd, cmd);
+ uc = ucall_alloc();
+ if (!uc)
+ uc = &tmp;
+
+ WRITE_ONCE(uc->cmd, cmd);
nargs = min(nargs, UCALL_MAX_ARGS);
va_start(va, nargs);
for (i = 0; i < nargs; ++i)
- WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
+ WRITE_ONCE(uc->args[i], va_arg(va, uint64_t));
va_end(va);
- ucall_arch_do_ucall((vm_vaddr_t)&uc);
+ /*
+ * When using the ucall pool implementation the @hva member of the ucall
+ * structs in the pool has been initialized to the hva of the ucall
+ * object.
+ */
+ if (use_ucall_pool)
+ ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+ else
+ ucall_arch_do_ucall((vm_vaddr_t)uc);
+
+ ucall_free(uc);
}
uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
index ead9946399ab..07c1bc41fa5c 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -30,7 +30,11 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
struct kvm_regs regs;
vcpu_regs_get(vcpu, ®s);
- return addr_gva2hva(vcpu->vm, regs.rdi);
+
+ if (vcpu->vm->use_ucall_pool)
+ return (void *)regs.rdi;
+ else
+ return addr_gva2hva(vcpu->vm, regs.rdi);
}
return NULL;
}
--
2.37.1.559.g78731f0fdb-goog
On Wed, Aug 10, 2022 at 08:20:32AM -0700, Peter Gonda wrote:
> To add support for encrypted, SEV, guests in the ucall framework
> introduce a new "ucall pool" implementation. This was suggested in
> the thread on "[RFC PATCH 00/10] KVM: selftests: Add support for
> test-selectable ucall implementations". Using a listed as suggested
s/listed/list/
> there doesn't work well because the list is setup using HVAs not GVAs
> so use a bitmap + array solution instead to get the same pool of ucall
> structs result.
>
> This allows for guests with encryption enabled set up a pool of ucall
to set up
> structs in the guest's shared memory region.
>
> Suggested-by: Sean Christopherson <[email protected]>
> Signed-off-by: Peter Gonda <[email protected]>
> ---
> .../selftests/kvm/include/kvm_util_base.h | 2 +
> .../selftests/kvm/include/ucall_common.h | 13 +--
> .../testing/selftests/kvm/lib/aarch64/ucall.c | 10 +-
> tools/testing/selftests/kvm/lib/riscv/ucall.c | 5 +-
> tools/testing/selftests/kvm/lib/s390x/ucall.c | 5 +-
> .../testing/selftests/kvm/lib/ucall_common.c | 108 +++++++++++++++++-
> .../testing/selftests/kvm/lib/x86_64/ucall.c | 6 +-
> 7 files changed, 131 insertions(+), 18 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
> index 1a84d2d1d85b..baede0d118c5 100644
> --- a/tools/testing/selftests/kvm/include/kvm_util_base.h
> +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
> @@ -103,6 +103,8 @@ struct kvm_vm {
> int stats_fd;
> struct kvm_stats_header stats_header;
> struct kvm_stats_desc *stats_desc;
> +
> + bool use_ucall_pool;
> };
>
>
> diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
> index 63bfc60be995..002a22e1cd1d 100644
> --- a/tools/testing/selftests/kvm/include/ucall_common.h
> +++ b/tools/testing/selftests/kvm/include/ucall_common.h
> @@ -22,6 +22,9 @@ enum {
> struct ucall {
> uint64_t cmd;
> uint64_t args[UCALL_MAX_ARGS];
> +
> + /* For ucall pool usage. */
> + struct ucall *hva;
> };
>
> void ucall_arch_init(struct kvm_vm *vm, void *arg);
> @@ -32,15 +35,9 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
> void ucall(uint64_t cmd, int nargs, ...);
> uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
>
> -static inline void ucall_init(struct kvm_vm *vm, void *arg)
> -{
> - ucall_arch_init(vm, arg);
> -}
> +void ucall_init(struct kvm_vm *vm, void *arg);
>
> -static inline void ucall_uninit(struct kvm_vm *vm)
> -{
> - ucall_arch_uninit(vm);
> -}
> +void ucall_uninit(struct kvm_vm *vm);
>
> #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
> ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
> diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
> index 132c0e98bf49..ee70531e8e51 100644
> --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
> +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
> @@ -81,12 +81,16 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
>
> if (run->exit_reason == KVM_EXIT_MMIO &&
> run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
> - vm_vaddr_t gva;
> + uint64_t ucall_addr;
Why change this vm_vaddr_t to a uint64_t? We shouldn't, because...
>
> TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
> "Unexpected ucall exit mmio address access");
> - memcpy(&gva, run->mmio.data, sizeof(gva));
> - return addr_gva2hva(vcpu->vm, gva);
> + memcpy(&ucall_addr, run->mmio.data, sizeof(ucall_addr));
...here we assume it's a vm_vaddr_t and only...
> +
> + if (vcpu->vm->use_ucall_pool)
> + return (void *)ucall_addr;
...here do we know otherwise. So only here should be any casting.
Also, I think here and in all the ucall_arch_get_ucall() functions we
should add a comment explaining a host-shared address is used, which
is why we don't need addr_gva2hva()
> + else
> + return addr_gva2hva(vcpu->vm, ucall_addr);
> }
>
> return NULL;
> diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
> index 37e091d4366e..4bb5616df29f 100644
> --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c
> +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
> @@ -59,7 +59,10 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
> run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) {
> switch (run->riscv_sbi.function_id) {
> case KVM_RISCV_SELFTESTS_SBI_UCALL:
> - return addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]);
> + if (vcpu->vm->use_ucall_pool)
> + return (void *)run->riscv_sbi.args[0];
> + else
> + return addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]);
Using vm_vaddr_t gva variable for run->riscv_sbi.args[0] like aarch64 does
for it's address would look a bit nicer.
> case KVM_RISCV_SELFTESTS_SBI_UNEXP:
> vcpu_dump(stderr, vcpu, 2);
> TEST_ASSERT(0, "Unexpected trap taken by guest");
> diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
> index 0f695a031d35..b24c6649877a 100644
> --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
> +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
> @@ -30,7 +30,10 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
> (run->s390_sieic.ipb >> 16) == 0x501) {
> int reg = run->s390_sieic.ipa & 0xf;
>
> - return addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]);
> + if (vcpu->vm->use_ucall_pool)
> + return (void *)run->s.regs.gprs[reg];
> + else
> + return addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]);
Same comment as for riscv.
> }
> return NULL;
> }
> diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
> index ced480860746..b6502a9420c4 100644
> --- a/tools/testing/selftests/kvm/lib/ucall_common.c
> +++ b/tools/testing/selftests/kvm/lib/ucall_common.c
> @@ -1,22 +1,122 @@
> // SPDX-License-Identifier: GPL-2.0-only
> #include "kvm_util.h"
> +#include "linux/types.h"
> +#include "linux/bitmap.h"
> +#include "linux/atomic.h"
> +
> +struct ucall_header {
> + DECLARE_BITMAP(in_use, KVM_MAX_VCPUS);
> + struct ucall ucalls[KVM_MAX_VCPUS];
> +};
> +
> +static bool use_ucall_pool;
I don't think we need this boolean. It will always be true when ucall_pool
is non-null and always false with ucall_pool is null. So we can just test
ucall_pool.
> +static struct ucall_header *ucall_pool;
> +
> +void ucall_init(struct kvm_vm *vm, void *arg)
> +{
> + struct ucall *uc;
> + struct ucall_header *hdr;
> + vm_vaddr_t vaddr;
> + int i;
> +
> + use_ucall_pool = vm->use_ucall_pool;
> + sync_global_to_guest(vm, use_ucall_pool);
> + if (!use_ucall_pool)
> + goto out;
> +
> + TEST_ASSERT(!ucall_pool, "Only a single encrypted guest at a time for ucalls.");
"Only a single VM may use a ucall pool at a time"
> + vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), vm->page_size);
> + hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr);
> + memset(hdr, 0, sizeof(*hdr));
> +
> + for (i = 0; i < KVM_MAX_VCPUS; ++i) {
> + uc = &hdr->ucalls[i];
> + uc->hva = uc;
> + }
> +
> + ucall_pool = (struct ucall_header *)vaddr;
> + sync_global_to_guest(vm, ucall_pool);
> +
> +out:
> + ucall_arch_init(vm, arg);
> +}
> +
> +void ucall_uninit(struct kvm_vm *vm)
> +{
> + use_ucall_pool = false;
> + ucall_pool = NULL;
> +
> + if (!vm->memcrypt.encrypted) {
Why is this condition here?
> + sync_global_to_guest(vm, use_ucall_pool);
> + sync_global_to_guest(vm, ucall_pool);
> + }
> +
> + ucall_arch_uninit(vm);
> +}
> +
> +static struct ucall *ucall_alloc(void)
> +{
> + struct ucall *uc = NULL;
> + int i;
> +
> + if (!use_ucall_pool)
> + goto out;
> +
> + for (i = 0; i < KVM_MAX_VCPUS; ++i) {
> + if (!atomic_test_and_set_bit(i, ucall_pool->in_use)) {
> + uc = &ucall_pool->ucalls[i];
> + memset(uc->args, 0, sizeof(uc->args));
Do we need to zero uc? If so, what about uc->cmd?
> + break;
> + }
> + }
Need a blank line here
> +out:
> + return uc;
> +}
> +
> +static inline size_t uc_pool_idx(struct ucall *uc)
> +{
> + return uc->hva - ucall_pool->ucalls;
> +}
> +
> +static void ucall_free(struct ucall *uc)
> +{
> + if (!use_ucall_pool)
> + return;
> +
> + clear_bit(uc_pool_idx(uc), ucall_pool->in_use);
> +}
>
> void ucall(uint64_t cmd, int nargs, ...)
> {
> - struct ucall uc = {};
> + struct ucall *uc;
> + struct ucall tmp = {};
> va_list va;
> int i;
>
> - WRITE_ONCE(uc.cmd, cmd);
> + uc = ucall_alloc();
> + if (!uc)
> + uc = &tmp;
> +
> + WRITE_ONCE(uc->cmd, cmd);
>
> nargs = min(nargs, UCALL_MAX_ARGS);
>
> va_start(va, nargs);
> for (i = 0; i < nargs; ++i)
> - WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
> + WRITE_ONCE(uc->args[i], va_arg(va, uint64_t));
> va_end(va);
>
> - ucall_arch_do_ucall((vm_vaddr_t)&uc);
> + /*
> + * When using the ucall pool implementation the @hva member of the ucall
> + * structs in the pool has been initialized to the hva of the ucall
> + * object.
> + */
> + if (use_ucall_pool)
> + ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
> + else
> + ucall_arch_do_ucall((vm_vaddr_t)uc);
> +
> + ucall_free(uc);
> }
>
> uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
> index ead9946399ab..07c1bc41fa5c 100644
> --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
> +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
> @@ -30,7 +30,11 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
> struct kvm_regs regs;
>
> vcpu_regs_get(vcpu, ®s);
> - return addr_gva2hva(vcpu->vm, regs.rdi);
> +
> + if (vcpu->vm->use_ucall_pool)
> + return (void *)regs.rdi;
> + else
> + return addr_gva2hva(vcpu->vm, regs.rdi);
> }
> return NULL;
> }
> --
> 2.37.1.559.g78731f0fdb-goog
>
Thanks,
drew
On Tue, Aug 16, 2022, Andrew Jones wrote:
> On Wed, Aug 10, 2022 at 08:20:32AM -0700, Peter Gonda wrote:
> > diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
> > index 132c0e98bf49..ee70531e8e51 100644
> > --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
> > +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
> > @@ -81,12 +81,16 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
> >
> > if (run->exit_reason == KVM_EXIT_MMIO &&
> > run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
> > - vm_vaddr_t gva;
> > + uint64_t ucall_addr;
>
> Why change this vm_vaddr_t to a uint64_t? We shouldn't, because...
>
> >
> > TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
> > "Unexpected ucall exit mmio address access");
> > - memcpy(&gva, run->mmio.data, sizeof(gva));
> > - return addr_gva2hva(vcpu->vm, gva);
> > + memcpy(&ucall_addr, run->mmio.data, sizeof(ucall_addr));
>
> ...here we assume it's a vm_vaddr_t and only...
>
> > +
> > + if (vcpu->vm->use_ucall_pool)
> > + return (void *)ucall_addr;
>
> ...here do we know otherwise. So only here should be any casting.
It technically should be a union, because if sizeof(vm_vaddr_t) < sizeof(void *)
then declaring it as a vm_addr_t will do the wrong thing. But then it's possible
that this could read too many bytes and inducs failure. So I guess what we really
need is a "static_assert(sizeof(vm_vaddr_t) == sizeof(void *))".
But why is "use_ucall_pool" optional? Unless there's a use case that fundamentally
conflicts with the pool approach, let's make the pool approach the _only_ approach.
IIRC, ARM's implementation isn't thread safe, i.e. there's at least one other use
case that _needs_ the pool implementation.
By supporting both, we are signing ourselves up for extra maintenance and pain,
e.g. inevitably we'll break whichever option isn't the default and not notice for
quite some time.
On Thu, Aug 18, 2022 at 04:00:40PM +0000, Sean Christopherson wrote:
> On Tue, Aug 16, 2022, Andrew Jones wrote:
> > On Wed, Aug 10, 2022 at 08:20:32AM -0700, Peter Gonda wrote:
> > > diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
> > > index 132c0e98bf49..ee70531e8e51 100644
> > > --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
> > > +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
> > > @@ -81,12 +81,16 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
> > >
> > > if (run->exit_reason == KVM_EXIT_MMIO &&
> > > run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
> > > - vm_vaddr_t gva;
> > > + uint64_t ucall_addr;
> >
> > Why change this vm_vaddr_t to a uint64_t? We shouldn't, because...
> >
> > >
> > > TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
> > > "Unexpected ucall exit mmio address access");
> > > - memcpy(&gva, run->mmio.data, sizeof(gva));
> > > - return addr_gva2hva(vcpu->vm, gva);
> > > + memcpy(&ucall_addr, run->mmio.data, sizeof(ucall_addr));
> >
> > ...here we assume it's a vm_vaddr_t and only...
> >
> > > +
> > > + if (vcpu->vm->use_ucall_pool)
> > > + return (void *)ucall_addr;
> >
> > ...here do we know otherwise. So only here should be any casting.
>
> It technically should be a union, because if sizeof(vm_vaddr_t) < sizeof(void *)
> then declaring it as a vm_addr_t will do the wrong thing. But then it's possible
> that this could read too many bytes and inducs failure. So I guess what we really
> need is a "static_assert(sizeof(vm_vaddr_t) == sizeof(void *))".
ack
>
> But why is "use_ucall_pool" optional? Unless there's a use case that fundamentally
> conflicts with the pool approach, let's make the pool approach the _only_ approach.
> IIRC, ARM's implementation isn't thread safe, i.e. there's at least one other use
> case that _needs_ the pool implementation.
Really? The ucall structure is on the vcpu's stack like the other
architectures. Ah, you're probably thinking about the shared address used
to exit to userspace. The address doesn't matter as long as no VM maps
it, but, yes, a multi-VM test where the VMs have different maps could end
up breaking ucalls for one or more VMs. It wouldn't be hard to make that
address per-VM, though, if ever necessary.
>
> By supporting both, we are signing ourselves up for extra maintenance and pain,
> e.g. inevitably we'll break whichever option isn't the default and not notice for
> quite some time.
uc pools are currently limited to a single VM. That could be changed, but
at the expense of even more code to maintain. The simple uc implementation
is, well, simple, and also supports multiple VMs. I'd prefer we keep that
one and keep it as the default.
Thanks,
drew
On Thu, Aug 18, 2022, Andrew Jones wrote:
> On Thu, Aug 18, 2022 at 04:00:40PM +0000, Sean Christopherson wrote:
> > But why is "use_ucall_pool" optional? Unless there's a use case that fundamentally
> > conflicts with the pool approach, let's make the pool approach the _only_ approach.
> > IIRC, ARM's implementation isn't thread safe, i.e. there's at least one other use
> > case that _needs_ the pool implementation.
>
> Really? The ucall structure is on the vcpu's stack like the other
> architectures. Ah, you're probably thinking about the shared address used
> to exit to userspace. The address doesn't matter as long as no VM maps
> it, but, yes, a multi-VM test where the VMs have different maps could end
> up breaking ucalls for one or more VMs.
Yah, that's what I was thinking of.
> It wouldn't be hard to make that address per-VM, though, if ever necessary.
>
> >
> > By supporting both, we are signing ourselves up for extra maintenance and pain,
> > e.g. inevitably we'll break whichever option isn't the default and not notice for
> > quite some time.
>
> uc pools are currently limited to a single VM. That could be changed, but
> at the expense of even more code to maintain.
Unless I'm missing something, it's actually less code. "globals" that are sync'd
to the guest aren't truly global, each VM has a separate physical page that is a
copy of the global, hence the need for sync_global_to_guest().
And FWIW, the code is actually "safe" in practice because I doubt any test spawns
multiple VMs in parallel, i.e. the host might get confused over the value of
ucall_pool, but guests won't stomp on each other so long as they're created
serially. Ditto for ARM's ucall_exit_mmio_addr.
To make this truly thread safe, we just need a way to atomically sync the pointer
to the guest, and that's easy enough to add.
With that out of the way, all of the conditional "use pool" code goes away, which
IMO simplifies things overall. Using a pool versus stack memory isn't _that_ much
more complicated, and we _know_ the stack approach doesn't work for all VM types.
Indeed, this partial conversion is:
7 files changed, 131 insertions(+), 18 deletions(-)
versus a full conversion:
6 files changed, 89 insertions(+), 20 deletions(-)
And simplification is only a secondary concern, what I'm really worried about is
things bitrotting and then some poor sod having to wade through unrelated issues
because somebody else broke the pool implementation and didn't notice.
Argh, case in point, none of the x86 (or s390 or RISC-V) tests do ucall_init(),
which would have been a lurking bug if any of them tried to switch to the pool.
Argh + case in point #2, this code is broken, and that bug may have sat unnoticed
due to only the esoteric SEV test using the pool.
-static inline size_t uc_pool_idx(struct ucall *uc)
+static noinline void ucall_free(struct ucall *uc)
{
- return uc->hva - ucall_pool->ucalls;
-}
-
-static void ucall_free(struct ucall *uc)
-{
- clear_bit(uc_pool_idx(uc), ucall_pool->in_use);
+ /* Beware, here be pointer arithmetic. */
+ clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use);
}
So my very strong vote is to fully switch to a single implementation. That way
our code either works or it doesn't, i.e. we notice very quickly if it breaks.
Peter, if I can convince Drew of One Pool To Rule Them All, can you slot in the
attached patches and slightly rework the ordering so that all SEV patches are at
the end? E.g. something like:
KVM: selftests: Automatically do init_ucall() for non-barebones VMs
KVM: selftests: move vm_phy_pages_alloc() earlier in file
KVM: selftests: sparsebit: add const where appropriate
KVM: selftests: add hooks for managing encrypted guest memory
KVM: selftests: handle encryption bits in page tables
KVM: selftests: add support for encrypted vm_vaddr_* allocations
KVM: selftests: Consolidate common code for popuplating
KVM: selftests: Consolidate boilerplate code in get_ucall()
tools: Add atomic_test_and_set_bit()
KVM: selftests: Add macro to atomically sync per-VM "global" pointers
KVM: selftests: Add ucall pool based implementation
KVM: selftests: add library for creating/interacting with SEV guests
KVM: selftests: Add simple sev vm testing
The patches are tested on x86 and compile tested on arm. I can provide more testing
if/when necessary. I also haven't addressed any other feedback in the "ucall pool"
patch, though I'm guessing much of it no longer applies.
On Thu, Aug 18, 2022 at 11:29:11PM +0000, Sean Christopherson wrote:
> On Thu, Aug 18, 2022, Andrew Jones wrote:
> > On Thu, Aug 18, 2022 at 04:00:40PM +0000, Sean Christopherson wrote:
> > > But why is "use_ucall_pool" optional? Unless there's a use case that fundamentally
> > > conflicts with the pool approach, let's make the pool approach the _only_ approach.
> > > IIRC, ARM's implementation isn't thread safe, i.e. there's at least one other use
> > > case that _needs_ the pool implementation.
> >
> > Really? The ucall structure is on the vcpu's stack like the other
> > architectures. Ah, you're probably thinking about the shared address used
> > to exit to userspace. The address doesn't matter as long as no VM maps
> > it, but, yes, a multi-VM test where the VMs have different maps could end
> > up breaking ucalls for one or more VMs.
>
> Yah, that's what I was thinking of.
>
> > It wouldn't be hard to make that address per-VM, though, if ever necessary.
> >
> > >
> > > By supporting both, we are signing ourselves up for extra maintenance and pain,
> > > e.g. inevitably we'll break whichever option isn't the default and not notice for
> > > quite some time.
> >
> > uc pools are currently limited to a single VM. That could be changed, but
> > at the expense of even more code to maintain.
>
> Unless I'm missing something, it's actually less code. "globals" that are sync'd
> to the guest aren't truly global, each VM has a separate physical page that is a
> copy of the global, hence the need for sync_global_to_guest().
>
> And FWIW, the code is actually "safe" in practice because I doubt any test spawns
> multiple VMs in parallel, i.e. the host might get confused over the value of
> ucall_pool, but guests won't stomp on each other so long as they're created
> serially. Ditto for ARM's ucall_exit_mmio_addr.
>
> To make this truly thread safe, we just need a way to atomically sync the pointer
> to the guest, and that's easy enough to add.
I like that.
>
> With that out of the way, all of the conditional "use pool" code goes away, which
> IMO simplifies things overall. Using a pool versus stack memory isn't _that_ much
> more complicated, and we _know_ the stack approach doesn't work for all VM types.
>
> Indeed, this partial conversion is:
>
> 7 files changed, 131 insertions(+), 18 deletions(-)
>
> versus a full conversion:
>
> 6 files changed, 89 insertions(+), 20 deletions(-)
>
> And simplification is only a secondary concern, what I'm really worried about is
> things bitrotting and then some poor sod having to wade through unrelated issues
> because somebody else broke the pool implementation and didn't notice.
>
> Argh, case in point, none of the x86 (or s390 or RISC-V) tests do ucall_init(),
> which would have been a lurking bug if any of them tried to switch to the pool.
>
> Argh + case in point #2, this code is broken, and that bug may have sat unnoticed
> due to only the esoteric SEV test using the pool.
>
> -static inline size_t uc_pool_idx(struct ucall *uc)
> +static noinline void ucall_free(struct ucall *uc)
> {
> - return uc->hva - ucall_pool->ucalls;
> -}
> -
> -static void ucall_free(struct ucall *uc)
> -{
> - clear_bit(uc_pool_idx(uc), ucall_pool->in_use);
> + /* Beware, here be pointer arithmetic. */
> + clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use);
> }
Dropping the only once-used uc_pool_idx() and adding the comment looks
better, but I don't think there was a bug because uc == uc->hva.
>
>
> So my very strong vote is to fully switch to a single implementation. That way
> our code either works or it doesn't, i.e. we notice very quickly if it breaks.
>
> Peter, if I can convince Drew of One Pool To Rule Them All, can you slot in the
There are other hills for me to stand on, so I won't insist on this one.
> attached patches and slightly rework the ordering so that all SEV patches are at
> the end? E.g. something like:
>
> KVM: selftests: Automatically do init_ucall() for non-barebones VMs
> KVM: selftests: move vm_phy_pages_alloc() earlier in file
> KVM: selftests: sparsebit: add const where appropriate
> KVM: selftests: add hooks for managing encrypted guest memory
> KVM: selftests: handle encryption bits in page tables
> KVM: selftests: add support for encrypted vm_vaddr_* allocations
> KVM: selftests: Consolidate common code for popuplating
> KVM: selftests: Consolidate boilerplate code in get_ucall()
> tools: Add atomic_test_and_set_bit()
> KVM: selftests: Add macro to atomically sync per-VM "global" pointers
> KVM: selftests: Add ucall pool based implementation
> KVM: selftests: add library for creating/interacting with SEV guests
> KVM: selftests: Add simple sev vm testing
>
> The patches are tested on x86 and compile tested on arm. I can provide more testing
> if/when necessary. I also haven't addressed any other feedback in the "ucall pool"
> patch, though I'm guessing much of it no longer applies.
I skimmed the patches but don't know how to comment on attachments, so
I'll add the two things that popped to mind here.
1) Doing ucall_init() at VM create time may be too early for Arm. Arm
probes for an unmapped address for the MMIO address it needs, so it's
best to do that after all mapping.
2) We'll need to change the sanity checks in Arm's get_ucall() to not
check that the MMIO address == ucall_exit_mmio_addr since there's no
guarantee that the exiting guest's address matches whatever is lingering
in the host's version. We can either drop the sanity check altogether
or try to come up with something else.
Thanks,
drew
On Fri, Aug 19, 2022, Andrew Jones wrote:
> On Thu, Aug 18, 2022 at 11:29:11PM +0000, Sean Christopherson wrote:
> > On Thu, Aug 18, 2022, Andrew Jones wrote:
> Dropping the only once-used uc_pool_idx() and adding the comment looks
> better, but I don't think there was a bug because uc == uc->hva.
uc == uc->hva for the host, but not for the guest. From the guest's perspective,
"hva" is an opaque pointer that has no direct relation to "uc".
> 1) Doing ucall_init() at VM create time may be too early for Arm. Arm
> probes for an unmapped address for the MMIO address it needs, so it's
> best to do that after all mapping.
Argh. I really hate the MMIO probing. Is there really no arbitrary address that
selftests can carve out and simply say "thou shalt not create a memslot here"?
Or can we just say that it's always immediate after memslot0? That would allow
us to delete the searching code in ARM's ucall_arch_init().
> 2) We'll need to change the sanity checks in Arm's get_ucall() to not
> check that the MMIO address == ucall_exit_mmio_addr since there's no
> guarantee that the exiting guest's address matches whatever is lingering
> in the host's version. We can either drop the sanity check altogether
> or try to come up with something else.
Ah, right. This one at least is easy to handle. If the address is per-VM, just
track the address. If it's hardcoded (as a fix for #1) then, it's even simpler.
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 8623c1568f97..74167540815b 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -94,7 +94,8 @@ struct kvm_vm {
vm_paddr_t pgd;
vm_vaddr_t gdt;
vm_vaddr_t tss;
- vm_vaddr_t idt;
+ vm_paddr_t pgd;
+ vm_vaddr_t ucall_addr;
vm_vaddr_t handlers;
uint32_t dirty_ring_size;
struct vm_memcrypt memcrypt;
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index b5d904f074b6..7f1d50dab0df 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -14,6 +14,8 @@ static vm_vaddr_t *ucall_exit_mmio_addr;
static void ucall_set_mmio_addr(struct kvm_vm *vm, vm_vaddr_t val)
{
+ vm->ucall_addr = val;
+
atomic_sync_global_pointer_to_guest(vm, ucall_exit_mmio_addr, val);
}
@@ -87,7 +89,7 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
if (run->exit_reason == KVM_EXIT_MMIO &&
- run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
+ run->mmio.phys_addr == vcpu->vm->ucall_addr) {
TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
"Unexpected ucall exit mmio address access");
return (void *)(*((uint64_t *)run->mmio.data));
On Fri, Aug 19, 2022, Vishal Annapurve wrote:
> On Wed, Aug 10, 2022 at 8:20 AM Peter Gonda <[email protected]> wrote:
> > void ucall(uint64_t cmd, int nargs, ...)
> > {
> > - struct ucall uc = {};
> > + struct ucall *uc;
> > + struct ucall tmp = {};
>
> This steps seems to result in generating instructions that need SSE
> support on x86:
> struct ucall tmp = {};
> movaps %xmm0,0x20(%rsp)
> movaps %xmm0,0x30(%rsp)
> movaps %xmm0,0x40(%rsp)
> movaps %xmm0,0x50(%rsp)
>
> This initialization will need proper compilation flags to generate
> instructions according to VM configuration.
Can you be more specific as to why generating SSE instructiions is problematic?
The compiler emitting fancy instructions for struct initialization is not out of
the ordinary.
On Wed, Aug 10, 2022 at 8:20 AM Peter Gonda <[email protected]> wrote:
> ...
> +
> +static struct ucall *ucall_alloc(void)
> +{
> + struct ucall *uc = NULL;
> + int i;
> +
> + if (!use_ucall_pool)
> + goto out;
> +
> + for (i = 0; i < KVM_MAX_VCPUS; ++i) {
> + if (!atomic_test_and_set_bit(i, ucall_pool->in_use)) {
> + uc = &ucall_pool->ucalls[i];
> + memset(uc->args, 0, sizeof(uc->args));
> + break;
> + }
> + }
> +out:
> + return uc;
> +}
> +
> +static inline size_t uc_pool_idx(struct ucall *uc)
> +{
> + return uc->hva - ucall_pool->ucalls;
> +}
> +
> +static void ucall_free(struct ucall *uc)
> +{
> + if (!use_ucall_pool)
> + return;
> +
> + clear_bit(uc_pool_idx(uc), ucall_pool->in_use);
> +}
>
> void ucall(uint64_t cmd, int nargs, ...)
> {
> - struct ucall uc = {};
> + struct ucall *uc;
> + struct ucall tmp = {};
This steps seems to result in generating instructions that need SSE
support on x86:
struct ucall tmp = {};
movaps %xmm0,0x20(%rsp)
movaps %xmm0,0x30(%rsp)
movaps %xmm0,0x40(%rsp)
movaps %xmm0,0x50(%rsp)
This initialization will need proper compilation flags to generate
instructions according to VM configuration.
> va_list va;
> int i;
>
> - WRITE_ONCE(uc.cmd, cmd);
> + uc = ucall_alloc();
> + if (!uc)
> + uc = &tmp;
> +
> + WRITE_ONCE(uc->cmd, cmd);
>
> nargs = min(nargs, UCALL_MAX_ARGS);
>
> va_start(va, nargs);
> for (i = 0; i < nargs; ++i)
> - WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
> + WRITE_ONCE(uc->args[i], va_arg(va, uint64_t));
> va_end(va);
>
> - ucall_arch_do_ucall((vm_vaddr_t)&uc);
> ...
On Fri, Aug 19, 2022, Sean Christopherson wrote:
> Or can we just say that it's always immediate after memslot0? That would allow
> us to delete the searching code in ARM's ucall_arch_init().
I have this coded up, will test on x86 and arm64 and send out a series (essentially
all of the non-SEV bits in this series).
Prescribing an MMIO address from __vm_create() has a some nice side effects.
1) KVM treats writes to read-only memslots as MMIO, so a future cleanup would
be to have __vm_create() create a memslot for the MMIO range to prevent
silently clobbering the address. I'll leave this for later because selftests
currently assumes they can use all memslots except memslot0.
2) It will simplify wwitching x86 and RISC-V to a common MMIO implementation,
if we ever want to do that. I.e. have common code for everything except s390.
On Fri, Aug 19, 2022 at 12:37 PM Sean Christopherson <[email protected]> wrote:
>
> On Fri, Aug 19, 2022, Vishal Annapurve wrote:
> > On Wed, Aug 10, 2022 at 8:20 AM Peter Gonda <[email protected]> wrote:
> > > void ucall(uint64_t cmd, int nargs, ...)
> > > {
> > > - struct ucall uc = {};
> > > + struct ucall *uc;
> > > + struct ucall tmp = {};
> >
> > This steps seems to result in generating instructions that need SSE
> > support on x86:
> > struct ucall tmp = {};
> > movaps %xmm0,0x20(%rsp)
> > movaps %xmm0,0x30(%rsp)
> > movaps %xmm0,0x40(%rsp)
> > movaps %xmm0,0x50(%rsp)
> >
> > This initialization will need proper compilation flags to generate
> > instructions according to VM configuration.
>
> Can you be more specific as to why generating SSE instructiions is problematic?
> The compiler emitting fancy instructions for struct initialization is not out of
> the ordinary.
When executing these instructions, I was hitting #GP in the guest VM.
CR4/CPUID settings seem to allow a VM to execute instructions that
need SSE support enabled (which I was doubting earlier). After some
more digging, it looks like the compiler is not able to ensure that
operands for such instructions are 16 byte aligned as required by the
hardware. There is a bug discussing this same issue:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=40838.
Adding -mstackrealign seems to help avoid running into #GP and things
seem to work normally. Though I am not sure if SSE instruction
execution is reliable from guest VM within selftests and so was
suggesting usage of "-mno-sse" or equivalent flags.