Date:   Thu, 2 Feb 2023 17:08:36 +0200
From:   Zhi Wang <zhi.wang.linux@gmail.com>
To:     Kechen Lu <kechenl@nvidia.com>
Cc:     <kvm@vger.kernel.org>, <seanjc@google.com>, <pbonzini@redhat.com>,
        <chao.gao@intel.com>, <shaoqin.huang@intel.com>,
        <vkuznets@redhat.com>, <linux-kernel@vger.kernel.org>
Subject: Re: [RFC PATCH v6 6/6] KVM: selftests: Add tests for VM and vCPU
 cap KVM_CAP_X86_DISABLE_EXITS
Message-ID: <20230202170836.000078f1@gmail.com>
In-Reply-To: <20230121020738.2973-7-kechenl@nvidia.com>
References: <20230121020738.2973-1-kechenl@nvidia.com>
        <20230121020738.2973-7-kechenl@nvidia.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=US-ASCII
Content-Transfer-Encoding: 7bit
Precedence: bulk

On Sat, 21 Jan 2023 02:07:38 +0000
Kechen Lu <kechenl@nvidia.com> wrote:

It works on my box now. LGTM. I was curious if there is any other userspace
application using this? It would be interesting to see their strategies.

> Add selftests for KVM cap KVM_CAP_X86_DISABLE_EXITS overriding flags
> in VM and vCPU scope both working as expected.
> 
> Suggested-by: Chao Gao <chao.gao@intel.com>
> Suggested-by: Shaoqin Huang <shaoqin.huang@intel.com>
> Suggested-by: Zhi Wang <zhi.wang.linux@gmail.com>
> Signed-off-by: Kechen Lu <kechenl@nvidia.com>
> ---
>  tools/testing/selftests/kvm/Makefile          |   1 +
>  .../selftests/kvm/x86_64/disable_exits_test.c | 363 ++++++++++++++++++
>  2 files changed, 364 insertions(+)
>  create mode 100644 tools/testing/selftests/kvm/x86_64/disable_exits_test.c
> 
> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> index 1750f91dd936..eeeba35e2536 100644
> --- a/tools/testing/selftests/kvm/Makefile
> +++ b/tools/testing/selftests/kvm/Makefile
> @@ -114,6 +114,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
>  TEST_GEN_PROGS_x86_64 += x86_64/amx_test
>  TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
>  TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
> +TEST_GEN_PROGS_x86_64 += x86_64/disable_exits_test
>  TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
>  TEST_GEN_PROGS_x86_64 += demand_paging_test
>  TEST_GEN_PROGS_x86_64 += dirty_log_test
> diff --git a/tools/testing/selftests/kvm/x86_64/disable_exits_test.c b/tools/testing/selftests/kvm/x86_64/disable_exits_test.c
> new file mode 100644
> index 000000000000..74a2152b35dd
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/x86_64/disable_exits_test.c
> @@ -0,0 +1,363 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Test per-VM and per-vCPU disable exits cap
> + * 1) Per-VM scope
> + * 2) Per-vCPU scope
> + *
> + */
> +
> +#define _GNU_SOURCE /* for program_invocation_short_name */
> +#include <pthread.h>
> +#include <inttypes.h>
> +#include <string.h>
> +#include <time.h>
> +#include <sys/ioctl.h>
> +
> +#include "test_util.h"
> +#include "kvm_util.h"
> +#include "svm_util.h"
> +#include "vmx.h"
> +#include "processor.h"
> +#include "asm/kvm.h"
> +#include "linux/kvm.h"
> +
> +/* Arbitrary chosen IPI vector value from sender to halter vCPU */
> +#define IPI_VECTOR	 0xa5
> +/* Number of HLTs halter vCPU thread executes */
> +#define LOOP_DURATION	 3
> +
> +struct guest_stats {
> +	uint32_t halter_apic_id;
> +	volatile uint64_t hlt_count;
> +	volatile uint64_t wake_count;
> +};
> +
> +static u64 read_vcpu_stats_halt_exits(struct kvm_vcpu *vcpu)
> +{
> +	int i;
> +	struct kvm_stats_header header;
> +	u64 *stats_data;
> +	u64 ret = 0;
> +	struct kvm_stats_desc *stats_desc;
> +	struct kvm_stats_desc *pdesc;
> +	int stats_fd = vcpu_get_stats_fd(vcpu);
> +
> +	read_stats_header(stats_fd, &header);
> +	if (header.num_desc == 0) {
> +		fprintf(stderr,
> +			"Cannot read halt exits since no KVM stats defined\n");
> +		return ret;
> +	}
> +
> +	stats_desc = read_stats_descriptors(stats_fd, &header);
> +	for (i = 0; i < header.num_desc; ++i) {
> +		pdesc = get_stats_descriptor(stats_desc, i, &header);
> +		if (!strncmp(pdesc->name, "halt_exits", 10)) {
> +			stats_data = malloc(pdesc->size * sizeof(*stats_data));
> +			read_stat_data(stats_fd, &header, pdesc, stats_data,
> +				pdesc->size);
> +			ret = *stats_data;
> +			free(stats_data);
> +			break;
> +		}
> +	}
> +	free(stats_desc);
> +	return ret;
> +}
> +
> +/* HLT multiple times in one vCPU */
> +static void halter_guest_code(struct guest_stats *data)
> +{
> +	xapic_enable();
> +	data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
> +
> +	for (;;) {
> +		data->hlt_count++;
> +		asm volatile("sti; hlt; cli");
> +		data->wake_count++;
> +	}
> +}
> +
> +/* Runs on halter vCPU when IPI arrives */
> +static void guest_ipi_handler(struct ex_regs *regs)
> +{
> +	xapic_write_reg(APIC_EOI, 11);
> +}
> +
> +/* Sender vCPU waits for ~1sec to assume HLT executed */
> +static void sender_wait_loop(struct guest_stats *data, uint64_t old_hlt_count,
> +		uint64_t old_wake_count)
> +{
> +	uint64_t tsc_start = rdtsc();
> +
> +	while (rdtsc() - tsc_start < 4000000000) {
> +		if ((data->wake_count != old_wake_count) &&
> +			(data->hlt_count != old_hlt_count))
> +			break;
> +	}
> +	GUEST_ASSERT((data->wake_count != old_wake_count) &&
> +			(data->hlt_count != old_hlt_count));
> +}
> +
> +/* Sender vCPU loops sending IPI to halter vCPU every ~1sec */
> +static void sender_guest_code(struct guest_stats *data)
> +{
> +	uint32_t icr_val;
> +	uint32_t icr2_val;
> +	uint64_t old_hlt_count = 0;
> +	uint64_t old_wake_count = 0;
> +
> +	xapic_enable();
> +	/* Init interrupt command register for sending IPIs */
> +	icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
> +	icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
> +
> +	for (;;) {
> +		/*
> +		 * Send IPI to halted vCPU
> +		 * First IPI sends here as already waited before sender vCPU
> +		 * thread creation
> +		 */
> +		xapic_write_reg(APIC_ICR2, icr2_val);
> +		xapic_write_reg(APIC_ICR, icr_val);
> +		sender_wait_loop(data, old_hlt_count, old_wake_count);
> +		old_wake_count = data->wake_count;
> +		old_hlt_count = data->hlt_count;
> +	}
> +}
> +
> +static void *vcpu_thread(void *arg)
> +{
> +	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
> +	int old;
> +	int r;
> +
> +	r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
> +	TEST_ASSERT(r == 0,
> +		"pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
> +		vcpu->id, r);
> +	fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
> +	vcpu_run(vcpu);
> +	return NULL;
> +}
> +
> +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
> +{
> +	void *retval;
> +	int r;
> +
> +	r = pthread_cancel(thread);
> +	TEST_ASSERT(r == 0,
> +		"pthread_cancel on vcpu_id=%d failed with errno=%d",
> +		vcpu->id, r);
> +
> +	r = pthread_join(thread, &retval);
> +	TEST_ASSERT(r == 0,
> +		"pthread_join on vcpu_id=%d failed with errno=%d",
> +		vcpu->id, r);
> +}
> +
> +static void vm_run_with_threads(struct kvm_vcpu *halter_vcpu,
> +		struct kvm_vcpu *sender_vcpu)
> +{
> +	int r;
> +	pthread_t threads[2];
> +
> +	/* Start halter vCPU thread and wait for it to execute first HLT. */
> +	r = pthread_create(&threads[0], NULL, vcpu_thread, halter_vcpu);
> +	TEST_ASSERT(r == 0,
> +		"pthread_create halter failed errno=%d", errno);
> +	fprintf(stderr, "Halter vCPU thread started\n");
> +
> +	sleep(1);
> +
> +	/*
> +	 * After guest halter vCPU executed first HLT, start the sender
> +	 * vCPU thread to wakeup halter vCPU
> +	 */
> +	r = pthread_create(&threads[1], NULL, vcpu_thread, sender_vcpu);
> +	TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
> +
> +	sleep(LOOP_DURATION);
> +
> +	cancel_join_vcpu_thread(threads[0], halter_vcpu);
> +	cancel_join_vcpu_thread(threads[1], sender_vcpu);
> +}
> +
> +/*
> + * Test case 1:
> + * Normal VM running with one vCPU keeps executing HLTs,
> + * another vCPU sending IPIs to wake it up, should expect
> + * all HLTs exiting to host
> + * and Test case 2:
> + * VM scoped exits disabling, HLT instructions
> + * stay inside guest without exits
> + */
> +static void test_vm_disable_exits_cap(bool cap_enabled)
> +{
> +	uint64_t kvm_halt_exits;
> +	struct kvm_vm *vm;
> +	struct kvm_vcpu *halter_vcpu;
> +	struct kvm_vcpu *sender_vcpu;
> +	struct guest_stats *data;
> +	vm_vaddr_t guest_stats_page_vaddr;
> +
> +	/* Create VM */
> +	vm = vm_create(2);
> +
> +	/*
> +	 * Before adding any vCPUs, enable the KVM_X86_DISABLE_EXITS cap
> +	 * with flag KVM_X86_DISABLE_EXITS_HLT
> +	 */
> +	if (cap_enabled)
> +		vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS,
> +			KVM_X86_DISABLE_EXITS_HLT);
> +
> +	/* Add vCPU with loops halting */
> +	halter_vcpu = vm_vcpu_add(vm, 0, halter_guest_code);
> +
> +	vm_init_descriptor_tables(vm);
> +	vcpu_init_descriptor_tables(halter_vcpu);
> +	vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
> +	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
> +
> +	/* Add vCPU with IPIs waking up halter vCPU */
> +	sender_vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
> +
> +	guest_stats_page_vaddr = vm_vaddr_alloc_page(vm);
> +	data = addr_gva2hva(vm, guest_stats_page_vaddr);
> +	memset(data, 0, sizeof(*data));
> +
> +	vcpu_args_set(halter_vcpu, 1, guest_stats_page_vaddr);
> +	vcpu_args_set(sender_vcpu, 1, guest_stats_page_vaddr);
> +
> +	vm_run_with_threads(halter_vcpu, sender_vcpu);
> +	kvm_halt_exits = read_vcpu_stats_halt_exits(halter_vcpu);
> +	if (cap_enabled)
> +		TEST_ASSERT(kvm_halt_exits == 0,
> +		"Halter vCPU had unexpected halt exits occurring after disabling VM-scoped halt exits cap\n");
> +	else
> +		TEST_ASSERT(kvm_halt_exits == data->hlt_count,
> +		"Halter vCPU had unmatched %lu halt exits - %lu HLTs executed, when not disabling VM halt exits\n",
> +		kvm_halt_exits, data->hlt_count);
> +	fprintf(stderr, "Halter vCPU had %lu halt exits\n",
> +		kvm_halt_exits);
> +	fprintf(stderr, "Guest records %lu HLTs executed, waked %lu times\n",
> +		data->hlt_count, data->wake_count);
> +
> +	kvm_vm_free(vm);
> +}
> +
> +/*
> + * Test case 3:
> + * VM overrides exits disable flags after vCPU created,
> + * which is not allowed
> + */
> +static void test_vm_disable_exits_cap_with_vcpu_created(void)
> +{
> +	int r;
> +	struct kvm_vm *vm;
> +	struct kvm_enable_cap cap = {
> +		.cap = KVM_CAP_X86_DISABLE_EXITS,
> +		.args[0] = KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_OVERRIDE,
> +	};
> +
> +	/* Create VM */
> +	vm = vm_create(1);
> +	/* Add vCPU with loops halting */
> +	vm_vcpu_add(vm, 0, halter_guest_code);
> +
> +	/*
> +	 * After creating vCPU, the current VM-scoped ABI should
> +	 * discard the cap enable of KVM_CAP_X86_DISABLE_EXITS
> +	 * and return non-zero. Since vm_enabled_cap() not able
> +	 * to assert the return value, so use the __vm_ioctl()
> +	 */
> +	r = __vm_ioctl(vm, KVM_ENABLE_CAP, &cap);
> +
> +	TEST_ASSERT(r != 0,
> +		"Setting VM-scoped KVM_CAP_X86_DISABLE_EXITS after vCPUs created is not allowed, but it succeeds here\n");
> +}
> +
> +/*
> + * Test case 4:
> + * vCPU scoped halt exits disabling and enabling tests,
> + * verify overides are working after vCPU created
> + */
> +static void test_vcpu_toggling_disable_exits_cap(void)
> +{
> +	uint64_t kvm_halt_exits;
> +	uint64_t kvm_halt_exits_in_guest;
> +	struct kvm_vm *vm;
> +	struct kvm_vcpu *halter_vcpu;
> +	struct kvm_vcpu *sender_vcpu;
> +	struct guest_stats *data;
> +	vm_vaddr_t guest_stats_page_vaddr;
> +
> +	/* Create VM */
> +	vm = vm_create(2);
> +
> +	/* Add vCPU with loops halting */
> +	halter_vcpu = vm_vcpu_add(vm, 0, halter_guest_code);
> +	/* Set KVM_CAP_X86_DISABLE_EXITS_HLT for halter vCPU */
> +	vcpu_enable_cap(halter_vcpu, KVM_CAP_X86_DISABLE_EXITS,
> +		KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_OVERRIDE);
> +
> +	vm_init_descriptor_tables(vm);
> +	vcpu_init_descriptor_tables(halter_vcpu);
> +	vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
> +	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
> +
> +	/* Add vCPU with IPIs waking up halter vCPU */
> +	sender_vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
> +
> +	guest_stats_page_vaddr = vm_vaddr_alloc_page(vm);
> +	data = addr_gva2hva(vm, guest_stats_page_vaddr);
> +	memset(data, 0, sizeof(*data));
> +
> +	vcpu_args_set(halter_vcpu, 1, guest_stats_page_vaddr);
> +	vcpu_args_set(sender_vcpu, 1, guest_stats_page_vaddr);
> +
> +	/*
> +	 * For the first phase of the running, halt exits
> +	 * are disabled, halter vCPU executes HLT instruction
> +	 * but never exits to host
> +	 */
> +	vm_run_with_threads(halter_vcpu, sender_vcpu);
> +	kvm_halt_exits_in_guest = data->hlt_count;
> +	fprintf(stderr, "Guest records %lu HLTs with halt exits disabled\n",
> +		data->hlt_count);
> +	/*
> +	 * Override and clean KVM_CAP_X86_DISABLE_EXITS flags
> +	 * for halter vCPU. Expect to see halt exits occurs then.
> +	 */
> +	vcpu_enable_cap(halter_vcpu, KVM_CAP_X86_DISABLE_EXITS,
> +		KVM_X86_DISABLE_EXITS_OVERRIDE);
> +	/*
> +	 * Second phase of the test, after guest halter vCPU
> +	 * reenabled halt exits, start the sender
> +	 * vCPU thread to wakeup halter vCPU
> +	 */
> +	vm_run_with_threads(halter_vcpu, sender_vcpu);
> +	kvm_halt_exits = read_vcpu_stats_halt_exits(halter_vcpu);
> +	TEST_ASSERT(kvm_halt_exits == data->hlt_count - kvm_halt_exits_in_guest,
> +		"Halter vCPU had unexpected %lu (should be %lu) halt exits\n",
> +		kvm_halt_exits, data->hlt_count - kvm_halt_exits_in_guest);
> +	fprintf(stderr, "Halter vCPU had %lu halt exits\n",
> +		kvm_halt_exits);
> +	fprintf(stderr, "Guest records %lu HLTs executed, waked %lu times\n",
> +		data->hlt_count, data->wake_count);
> +
> +	kvm_vm_free(vm);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	fprintf(stderr, "VM-scoped tests start\n");
> +	test_vm_disable_exits_cap(false);
> +	test_vm_disable_exits_cap(true);
> +	test_vm_disable_exits_cap_with_vcpu_created();
> +	fprintf(stderr, "vCPU-scoped test starts\n");
> +	test_vcpu_toggling_disable_exits_cap();
> +	return 0;
> +}