To help when debugging failures in the field, if instruction emulation
fails, report the VM exit reason, etc. to userspace in order that it
can be recorded.
The SGX changes here are compiled but untested.
Sean: if you want me to add your name to patch 3, given that I adopted
your sample code almost unaltered, please say.
v4:
- Update the API for preparing emulation failure report (Sean)
- sgx uses the provided API in all relevant cases (Sean)
- Clarify the intended layout of kvm_run.emulation_failure.
v3:
- Convey any debug data un-flagged after the ABI specified data in
struct emulation_failure (Sean)
- Obey the ABI protocol in sgx_handle_emulation_failure() (Sean)
v2:
- Improve patch comments (dmatlock)
- Intel should provide the full exit reason (dmatlock)
- Pass a boolean rather than flags (dmatlock)
- Use the helper in kvm_task_switch() and kvm_handle_memory_failure()
(dmatlock)
- Describe the exit_reason field of the emulation_failure structure
(dmatlock)
David Edmondson (4):
KVM: x86: Clarify the kvm_run.emulation_failure structure layout
KVM: x86: Get exit_reason as part of kvm_x86_ops.get_exit_info
KVM: x86: On emulation failure, convey the exit reason, etc. to
userspace
KVM: x86: SGX must obey the KVM_INTERNAL_ERROR_EMULATION protocol
arch/x86/include/asm/kvm_host.h | 10 +++--
arch/x86/kvm/svm/svm.c | 8 ++--
arch/x86/kvm/trace.h | 9 ++--
arch/x86/kvm/vmx/nested.c | 2 +-
arch/x86/kvm/vmx/sgx.c | 16 +++-----
arch/x86/kvm/vmx/vmx.c | 11 +++--
arch/x86/kvm/x86.c | 73 ++++++++++++++++++++++++++-------
include/uapi/linux/kvm.h | 15 ++++++-
8 files changed, 100 insertions(+), 44 deletions(-)
--
2.30.2
Should instruction emulation fail, include the VM exit reason, etc. in
the emulation_failure data passed to userspace, in order that the VMM
can report it as a debugging aid when describing the failure.
Suggested-by: Joao Martins <[email protected]>
Signed-off-by: David Edmondson <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 3 ++
arch/x86/kvm/vmx/vmx.c | 5 +--
arch/x86/kvm/x86.c | 73 ++++++++++++++++++++++++++-------
include/uapi/linux/kvm.h | 7 ++++
4 files changed, 70 insertions(+), 18 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e3c0788bcdc2..da2d8f3a2019 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1630,6 +1630,9 @@ extern u64 kvm_mce_cap_supported;
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len);
+void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
+ u64 *data, u8 ndata);
+void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6e5706ecce0b..9d14f68651f1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5367,10 +5367,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (vmx->emulation_required && !vmx->rmode.vm86_active &&
vcpu->arch.exception.pending) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror =
- KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
+ kvm_prepare_emulation_failure_exit(vcpu);
return 0;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5d5c5ed7dd4..35639391de7b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7465,29 +7465,78 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
}
EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
-static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
+static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
+ u8 ndata, u8 *insn_bytes, u8 insn_size)
{
- struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
- u32 insn_size = ctxt->fetch.end - ctxt->fetch.data;
struct kvm_run *run = vcpu->run;
+ u8 ndata_start;
+ u64 info[5];
+
+ /*
+ * Zero the whole array used to retrieve the exit info, casting to u32
+ * for select entries will leave some chunks uninitialized.
+ */
+ memset(&info, 0, sizeof(info));
+
+ static_call(kvm_x86_get_exit_info)(vcpu, (u32 *)&info[0], &info[1],
+ &info[2], (u32 *)&info[3],
+ (u32 *)&info[4]);
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION;
- run->emulation_failure.ndata = 0;
+
+ /*
+ * There's currently space for 13 entries, but 5 are used for the exit
+ * reason and info. Restrict to 4 to reduce the maintenance burden
+ * when expanding kvm_run.emulation_failure in the future.
+ */
+ if (WARN_ON_ONCE(ndata > 4))
+ ndata = 4;
+
+ /* Always include the flags as a 'data' entry. */
+ ndata_start = 1;
run->emulation_failure.flags = 0;
if (insn_size) {
- run->emulation_failure.ndata = 3;
+ ndata_start += (sizeof(run->emulation_failure.insn_size) +
+ sizeof(run->emulation_failure.insn_bytes)) /
+ sizeof(u64);
run->emulation_failure.flags |=
KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
run->emulation_failure.insn_size = insn_size;
memset(run->emulation_failure.insn_bytes, 0x90,
sizeof(run->emulation_failure.insn_bytes));
- memcpy(run->emulation_failure.insn_bytes,
- ctxt->fetch.data, insn_size);
+ memcpy(run->emulation_failure.insn_bytes, insn_bytes, insn_size);
}
+
+ memcpy(&run->internal.data[ndata_start], info, sizeof(info));
+ memcpy(&run->internal.data[ndata_start + ARRAY_SIZE(info)], data,
+ ndata * sizeof(u64));
+
+ run->emulation_failure.ndata = ndata_start + ARRAY_SIZE(info) + ndata;
}
+static void prepare_emulation_ctxt_failure_exit(struct kvm_vcpu *vcpu)
+{
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
+
+ prepare_emulation_failure_exit(vcpu, NULL, 0, ctxt->fetch.data,
+ ctxt->fetch.end - ctxt->fetch.data);
+}
+
+void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
+ u8 ndata)
+{
+ prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit);
+
+void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
+{
+ __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit);
+
static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
{
struct kvm *kvm = vcpu->kvm;
@@ -7502,16 +7551,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
if (kvm->arch.exit_on_emulation_error ||
(emulation_type & EMULTYPE_SKIP)) {
- prepare_emulation_failure_exit(vcpu);
+ prepare_emulation_ctxt_failure_exit(vcpu);
return 0;
}
kvm_queue_exception(vcpu, UD_VECTOR);
if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
+ prepare_emulation_ctxt_failure_exit(vcpu);
return 0;
}
@@ -12104,9 +12151,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
* doesn't seem to be a real use-case behind such requests, just return
* KVM_EXIT_INTERNAL_ERROR for now.
*/
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
+ kvm_prepare_emulation_failure_exit(vcpu);
return 0;
}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6c79c1ce3703..e86cc2de7b5c 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -397,6 +397,12 @@ struct kvm_run {
* "ndata" is correct, that new fields are enumerated in "flags",
* and that each flag enumerates fields that are 64-bit aligned
* and sized (so that ndata+internal.data[] is valid/accurate).
+ *
+ * Space beyond the defined fields may be used to
+ * store arbitrary debug information relating to the
+ * emulation failure. It is accounted for in "ndata"
+ * but otherwise unspecified and is not represented in
+ * "flags".
*/
struct {
__u32 suberror;
@@ -408,6 +414,7 @@ struct kvm_run {
__u8 insn_bytes[15];
};
};
+ /* Arbitrary debug data may follow. */
} emulation_failure;
/* KVM_EXIT_OSI */
struct {
--
2.30.2
When passing the failing address and size out to user space, SGX must
ensure not to trample on the earlier fields of the emulation_failure
sub-union of struct kvm_run.
Signed-off-by: David Edmondson <[email protected]>
---
arch/x86/kvm/vmx/sgx.c | 16 +++++-----------
1 file changed, 5 insertions(+), 11 deletions(-)
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index 6693ebdc0770..35e7ec91ae86 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -53,11 +53,9 @@ static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
unsigned int size)
{
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 2;
- vcpu->run->internal.data[0] = addr;
- vcpu->run->internal.data[1] = size;
+ uint64_t data[2] = { addr, size };
+
+ __kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data));
}
static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
@@ -112,9 +110,7 @@ static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
* but the error code isn't (yet) plumbed through the ENCLS helpers.
*/
if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
+ kvm_prepare_emulation_failure_exit(vcpu);
return 0;
}
@@ -155,9 +151,7 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
if (!sgx_12_0 || !sgx_12_1) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
+ kvm_prepare_emulation_failure_exit(vcpu);
return 0;
}
--
2.30.2
Extend the get_exit_info static call to provide the reason for the VM
exit. Modify relevant trace points to use this rather than extracting
the reason in the caller.
Signed-off-by: David Edmondson <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 7 ++++---
arch/x86/kvm/svm/svm.c | 8 +++++---
arch/x86/kvm/trace.h | 9 +++++----
arch/x86/kvm/vmx/nested.c | 2 +-
arch/x86/kvm/vmx/vmx.c | 6 ++++--
5 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 974cbfb1eefe..e3c0788bcdc2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1377,10 +1377,11 @@ struct kvm_x86_ops {
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
/*
- * Retrieve somewhat arbitrary exit information. Intended to be used
- * only from within tracepoints to avoid VMREADs when tracing is off.
+ * Retrieve somewhat arbitrary exit information. Intended to
+ * be used only from within tracepoints or error paths.
*/
- void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
+ void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
+ u64 *info1, u64 *info2,
u32 *exit_int_info, u32 *exit_int_info_err_code);
int (*check_intercept)(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e8ccab50ebf6..0df2fe5faa69 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3305,11 +3305,13 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
return svm_exit_handlers[exit_code](vcpu);
}
-static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
+static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
+ u64 *info1, u64 *info2,
u32 *intr_info, u32 *error_code)
{
struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
+ *reason = control->exit_code;
*info1 = control->exit_info_1;
*info2 = control->exit_info_2;
*intr_info = control->exit_int_info;
@@ -3326,7 +3328,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
struct kvm_run *kvm_run = vcpu->run;
u32 exit_code = svm->vmcb->control.exit_code;
- trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
+ trace_kvm_exit(vcpu, KVM_ISA_SVM);
/* SEV-ES guests must use the CR write traps to track CR registers. */
if (!sev_es_guest(vcpu->kvm)) {
@@ -3339,7 +3341,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
if (is_guest_mode(vcpu)) {
int vmexit;
- trace_kvm_nested_vmexit(exit_code, vcpu, KVM_ISA_SVM);
+ trace_kvm_nested_vmexit(vcpu, KVM_ISA_SVM);
vmexit = nested_svm_exit_special(svm);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 03ebe368333e..953b0fcb21ee 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -288,8 +288,8 @@ TRACE_EVENT(kvm_apic,
#define TRACE_EVENT_KVM_EXIT(name) \
TRACE_EVENT(name, \
- TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu, u32 isa), \
- TP_ARGS(exit_reason, vcpu, isa), \
+ TP_PROTO(struct kvm_vcpu *vcpu, u32 isa), \
+ TP_ARGS(vcpu, isa), \
\
TP_STRUCT__entry( \
__field( unsigned int, exit_reason ) \
@@ -303,11 +303,12 @@ TRACE_EVENT(name, \
), \
\
TP_fast_assign( \
- __entry->exit_reason = exit_reason; \
__entry->guest_rip = kvm_rip_read(vcpu); \
__entry->isa = isa; \
__entry->vcpu_id = vcpu->vcpu_id; \
- static_call(kvm_x86_get_exit_info)(vcpu, &__entry->info1, \
+ static_call(kvm_x86_get_exit_info)(vcpu, \
+ &__entry->exit_reason, \
+ &__entry->info1, \
&__entry->info2, \
&__entry->intr_info, \
&__entry->error_code); \
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1a52134b0c42..fbbc01e9570b 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -6025,7 +6025,7 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
goto reflect_vmexit;
}
- trace_kvm_nested_vmexit(exit_reason.full, vcpu, KVM_ISA_VMX);
+ trace_kvm_nested_vmexit(vcpu, KVM_ISA_VMX);
/* If L0 (KVM) wants the exit, it trumps L1's desires. */
if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 927a552393b9..6e5706ecce0b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5617,11 +5617,13 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
static const int kvm_vmx_max_exit_handlers =
ARRAY_SIZE(kvm_vmx_exit_handlers);
-static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
+static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
+ u64 *info1, u64 *info2,
u32 *intr_info, u32 *error_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ *reason = vmx->exit_reason.full;
*info1 = vmx_get_exit_qual(vcpu);
if (!(vmx->exit_reason.failed_vmentry)) {
*info2 = vmx->idt_vectoring_info;
@@ -6748,7 +6750,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (likely(!vmx->exit_reason.failed_vmentry))
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
- trace_kvm_exit(vmx->exit_reason.full, vcpu, KVM_ISA_VMX);
+ trace_kvm_exit(vcpu, KVM_ISA_VMX);
if (unlikely(vmx->exit_reason.failed_vmentry))
return EXIT_FASTPATH_NONE;
--
2.30.2
On Fri, Aug 13, 2021, David Edmondson wrote:
> -static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
> +static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
> + u8 ndata, u8 *insn_bytes, u8 insn_size)
> {
> - struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
> - u32 insn_size = ctxt->fetch.end - ctxt->fetch.data;
> struct kvm_run *run = vcpu->run;
> + u8 ndata_start;
> + u64 info[5];
> +
> + /*
> + * Zero the whole array used to retrieve the exit info, casting to u32
> + * for select entries will leave some chunks uninitialized.
> + */
> + memset(&info, 0, sizeof(info));
> +
> + static_call(kvm_x86_get_exit_info)(vcpu, (u32 *)&info[0], &info[1],
> + &info[2], (u32 *)&info[3],
> + (u32 *)&info[4]);
>
> run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION;
> - run->emulation_failure.ndata = 0;
> +
> + /*
> + * There's currently space for 13 entries, but 5 are used for the exit
> + * reason and info. Restrict to 4 to reduce the maintenance burden
> + * when expanding kvm_run.emulation_failure in the future.
> + */
> + if (WARN_ON_ONCE(ndata > 4))
> + ndata = 4;
> +
> + /* Always include the flags as a 'data' entry. */
> + ndata_start = 1;
> run->emulation_failure.flags = 0;
>
> if (insn_size) {
> - run->emulation_failure.ndata = 3;
> + ndata_start += (sizeof(run->emulation_failure.insn_size) +
> + sizeof(run->emulation_failure.insn_bytes)) /
> + sizeof(u64);
Hrm, I like the intent, but the end result ends up being rather convoluted and
unnecessarily scary, e.g. this would do the wrong thing if the combined size of
the fields is not a multiple of 8. That's obviously is not true, but relying on
insn_size/insn_bytes being carefully selected while simultaneously obscuring that
dependency is a bit mean. What about a compile-time assertion with a more reader
friendly literal for bumping the count?
BUILD_BUG_ON((sizeof(run->emulation_failure.insn_size) +
sizeof(run->emulation_failure.insn_bytes) != 16));
ndata_start += 2;
> run->emulation_failure.flags |=
> KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
> run->emulation_failure.insn_size = insn_size;
> memset(run->emulation_failure.insn_bytes, 0x90,
> sizeof(run->emulation_failure.insn_bytes));
> - memcpy(run->emulation_failure.insn_bytes,
> - ctxt->fetch.data, insn_size);
> + memcpy(run->emulation_failure.insn_bytes, insn_bytes, insn_size);
> }
> +
> + memcpy(&run->internal.data[ndata_start], info, sizeof(info));
Oof, coming back to this code after some time away, "ndata_start" is confusing.
I believe past me thought that it would help convey that "info" is lumped into
the arbitrary data, but for me at least it just ends up making the interaction
with @data and @ndata more confusing. Sorry for the bad suggestion :-/
What about info_start? IMO, that makes the memcpy more readable. Another option
would be to have the name describe the number of "ABI enries", but I can't come
up with a variable name that's remotely readable.
memcpy(&run->internal.data[info_start], info, sizeof(info));
memcpy(&run->internal.data[info_start + ARRAY_SIZE(info)], data,
ndata * sizeof(data[0]));
> + memcpy(&run->internal.data[ndata_start + ARRAY_SIZE(info)], data,
> + ndata * sizeof(u64));
Not that it really matters, but it's probably better to use sizeof(data[0]) or
sizeof(*data). E.g. if we do screw up the param in the future, we only botch the
output formatting, as opposed to dumping kernel stack data to userspace.
> +
> + run->emulation_failure.ndata = ndata_start + ARRAY_SIZE(info) + ndata;
> }
>
> +static void prepare_emulation_ctxt_failure_exit(struct kvm_vcpu *vcpu)
> +{
> + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
> +
> + prepare_emulation_failure_exit(vcpu, NULL, 0, ctxt->fetch.data,
> + ctxt->fetch.end - ctxt->fetch.data);
> +}
> +
> +void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
> + u8 ndata)
> +{
> + prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0);
> +}
> +EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit);
> +
> +void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
> +{
> + __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0);
> +}
> +EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit);
> +
> static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
> {
> struct kvm *kvm = vcpu->kvm;
> @@ -7502,16 +7551,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
>
> if (kvm->arch.exit_on_emulation_error ||
> (emulation_type & EMULTYPE_SKIP)) {
> - prepare_emulation_failure_exit(vcpu);
> + prepare_emulation_ctxt_failure_exit(vcpu);
> return 0;
> }
>
> kvm_queue_exception(vcpu, UD_VECTOR);
>
> if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
> - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
> - vcpu->run->internal.ndata = 0;
> + prepare_emulation_ctxt_failure_exit(vcpu);
> return 0;
> }
>
> @@ -12104,9 +12151,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
> * doesn't seem to be a real use-case behind such requests, just return
> * KVM_EXIT_INTERNAL_ERROR for now.
> */
> - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
> - vcpu->run->internal.ndata = 0;
> + kvm_prepare_emulation_failure_exit(vcpu);
>
> return 0;
> }
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 6c79c1ce3703..e86cc2de7b5c 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -397,6 +397,12 @@ struct kvm_run {
> * "ndata" is correct, that new fields are enumerated in "flags",
> * and that each flag enumerates fields that are 64-bit aligned
> * and sized (so that ndata+internal.data[] is valid/accurate).
> + *
> + * Space beyond the defined fields may be used to
Please run these out to 80 chars. Even 80 is a soft limit, it's ok to run over
a bit if the end result is (subjectively) prettier.
> + * store arbitrary debug information relating to the
> + * emulation failure. It is accounted for in "ndata"
> + * but otherwise unspecified and is not represented in
Explicitly state the format is unspecified?
> + * "flags".
And also explicitly stating the debug info isn't ABI, e.g.
* Space beyond the defined fields may be used to store arbitrary
* debug information relating to the emulation failure. It is
* accounted for in "ndata" but the format is unspecified and
* is not represented in "flags". Any such info is _not_ ABI!
> */
> struct {
> __u32 suberror;
> @@ -408,6 +414,7 @@ struct kvm_run {
> __u8 insn_bytes[15];
> };
> };
> + /* Arbitrary debug data may follow. */
> } emulation_failure;
> /* KVM_EXIT_OSI */
> struct {
> --
> 2.30.2
>
On Fri, Aug 13, 2021, David Edmondson wrote:
> When passing the failing address and size out to user space, SGX must
> ensure not to trample on the earlier fields of the emulation_failure
> sub-union of struct kvm_run.
>
> Signed-off-by: David Edmondson <[email protected]>
> ---
Reviewed-by: Sean Christopherson <[email protected]>
Sean Christopherson <[email protected]> writes:
> On Fri, Aug 13, 2021, David Edmondson wrote:
>> -static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
>> +static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
>> + u8 ndata, u8 *insn_bytes, u8 insn_size)
>> {
>> - struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
>> - u32 insn_size = ctxt->fetch.end - ctxt->fetch.data;
>> struct kvm_run *run = vcpu->run;
>> + u8 ndata_start;
>> + u64 info[5];
>> +
>> + /*
>> + * Zero the whole array used to retrieve the exit info, casting to u32
>> + * for select entries will leave some chunks uninitialized.
>> + */
>> + memset(&info, 0, sizeof(info));
>> +
>> + static_call(kvm_x86_get_exit_info)(vcpu, (u32 *)&info[0], &info[1],
>> + &info[2], (u32 *)&info[3],
>> + (u32 *)&info[4]);
>>
>> run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
>> run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION;
>> - run->emulation_failure.ndata = 0;
>> +
>> + /*
>> + * There's currently space for 13 entries, but 5 are used for the exit
>> + * reason and info. Restrict to 4 to reduce the maintenance burden
>> + * when expanding kvm_run.emulation_failure in the future.
>> + */
>> + if (WARN_ON_ONCE(ndata > 4))
>> + ndata = 4;
>> +
>> + /* Always include the flags as a 'data' entry. */
>> + ndata_start = 1;
>> run->emulation_failure.flags = 0;
>>
>> if (insn_size) {
>> - run->emulation_failure.ndata = 3;
>> + ndata_start += (sizeof(run->emulation_failure.insn_size) +
>> + sizeof(run->emulation_failure.insn_bytes)) /
>> + sizeof(u64);
>
> Hrm, I like the intent, but the end result ends up being rather convoluted and
> unnecessarily scary, e.g. this would do the wrong thing if the combined size of
> the fields is not a multiple of 8. That's obviously is not true, but relying on
> insn_size/insn_bytes being carefully selected while simultaneously obscuring that
> dependency is a bit mean. What about a compile-time assertion with a more reader
> friendly literal for bumping the count?
>
> BUILD_BUG_ON((sizeof(run->emulation_failure.insn_size) +
> sizeof(run->emulation_failure.insn_bytes) != 16));
> ndata_start += 2;
Okay.
>> run->emulation_failure.flags |=
>> KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
>> run->emulation_failure.insn_size = insn_size;
>> memset(run->emulation_failure.insn_bytes, 0x90,
>> sizeof(run->emulation_failure.insn_bytes));
>> - memcpy(run->emulation_failure.insn_bytes,
>> - ctxt->fetch.data, insn_size);
>> + memcpy(run->emulation_failure.insn_bytes, insn_bytes, insn_size);
>> }
>> +
>> + memcpy(&run->internal.data[ndata_start], info, sizeof(info));
>
> Oof, coming back to this code after some time away, "ndata_start" is confusing.
> I believe past me thought that it would help convey that "info" is lumped into
> the arbitrary data, but for me at least it just ends up making the interaction
> with @data and @ndata more confusing. Sorry for the bad suggestion :-/
>
> What about info_start? IMO, that makes the memcpy more readable. Another option
> would be to have the name describe the number of "ABI enries", but I can't come
> up with a variable name that's remotely readable.
>
> memcpy(&run->internal.data[info_start], info, sizeof(info));
> memcpy(&run->internal.data[info_start + ARRAY_SIZE(info)], data,
> ndata * sizeof(data[0]));
Okay.
>> + memcpy(&run->internal.data[ndata_start + ARRAY_SIZE(info)], data,
>> + ndata * sizeof(u64));
>
> Not that it really matters, but it's probably better to use sizeof(data[0]) or
> sizeof(*data). E.g. if we do screw up the param in the future, we only botch the
> output formatting, as opposed to dumping kernel stack data to userspace.
Agreed.
>> +
>> + run->emulation_failure.ndata = ndata_start + ARRAY_SIZE(info) + ndata;
>> }
>>
>> +static void prepare_emulation_ctxt_failure_exit(struct kvm_vcpu *vcpu)
>> +{
>> + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
>> +
>> + prepare_emulation_failure_exit(vcpu, NULL, 0, ctxt->fetch.data,
>> + ctxt->fetch.end - ctxt->fetch.data);
>> +}
>> +
>> +void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
>> + u8 ndata)
>> +{
>> + prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0);
>> +}
>> +EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit);
>> +
>> +void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
>> +{
>> + __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0);
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit);
>> +
>> static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
>> {
>> struct kvm *kvm = vcpu->kvm;
>> @@ -7502,16 +7551,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
>>
>> if (kvm->arch.exit_on_emulation_error ||
>> (emulation_type & EMULTYPE_SKIP)) {
>> - prepare_emulation_failure_exit(vcpu);
>> + prepare_emulation_ctxt_failure_exit(vcpu);
>> return 0;
>> }
>>
>> kvm_queue_exception(vcpu, UD_VECTOR);
>>
>> if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
>> - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
>> - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
>> - vcpu->run->internal.ndata = 0;
>> + prepare_emulation_ctxt_failure_exit(vcpu);
>> return 0;
>> }
>>
>> @@ -12104,9 +12151,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
>> * doesn't seem to be a real use-case behind such requests, just return
>> * KVM_EXIT_INTERNAL_ERROR for now.
>> */
>> - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
>> - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
>> - vcpu->run->internal.ndata = 0;
>> + kvm_prepare_emulation_failure_exit(vcpu);
>>
>> return 0;
>> }
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index 6c79c1ce3703..e86cc2de7b5c 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -397,6 +397,12 @@ struct kvm_run {
>> * "ndata" is correct, that new fields are enumerated in "flags",
>> * and that each flag enumerates fields that are 64-bit aligned
>> * and sized (so that ndata+internal.data[] is valid/accurate).
>> + *
>> + * Space beyond the defined fields may be used to
>
> Please run these out to 80 chars. Even 80 is a soft limit, it's ok to run over
> a bit if the end result is (subjectively) prettier.
>
>> + * store arbitrary debug information relating to the
>> + * emulation failure. It is accounted for in "ndata"
>> + * but otherwise unspecified and is not represented in
>
> Explicitly state the format is unspecified?
>
>> + * "flags".
>
> And also explicitly stating the debug info isn't ABI, e.g.
>
> * Space beyond the defined fields may be used to store arbitrary
> * debug information relating to the emulation failure. It is
> * accounted for in "ndata" but the format is unspecified and
> * is not represented in "flags". Any such info is _not_ ABI!
Okay.
>> */
>> struct {
>> __u32 suberror;
>> @@ -408,6 +414,7 @@ struct kvm_run {
>> __u8 insn_bytes[15];
>> };
>> };
>> + /* Arbitrary debug data may follow. */
>> } emulation_failure;
>> /* KVM_EXIT_OSI */
>> struct {
>> --
>> 2.30.2
>>