2022-09-29 17:24:56

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH v2 0/8] KVM: x86: allow compiling out SMM support

Some users of KVM implement the UEFI variable store through a paravirtual device
that does not require the "SMM lockbox" component of edk2; allow them to
compile out system management mode, which is not a full implementation
especially in how it interacts with nested virtualization.

In order to limit the number and especially the size of the #ifdefs,
the first 4 patches move most SMM code to a completely new file in
arch/x86/kvm. Patch 5 is the main change to introduce the Kconfig
symbol and key smm.c's compilation off it; patches 6-8 instead eliminate
other bits of SMM code that remain outside smm.c, the last one being
"optional" as it is a bit into diminishing returns land.

Paolo

Paolo Bonzini (8):
KVM: x86: start moving SMM-related functions to new files
KVM: x86: move SMM entry to a new file
KVM: x86: move SMM exit to a new file
KVM: x86: do not go through ctxt->ops when emulating rsm
KVM: allow compiling out SMM support
KVM: x86: compile out vendor-specific code if SMM is disabled
KVM: x86: remove SMRAM address space if SMM is not supported
KVM: x86: do not define KVM_REQ_SMI if SMM disabled

arch/x86/include/asm/kvm-x86-ops.h | 2 +
arch/x86/include/asm/kvm_host.h | 24 +-
arch/x86/kvm/Kconfig | 11 +
arch/x86/kvm/Makefile | 1 +
arch/x86/kvm/emulate.c | 355 +----------
arch/x86/kvm/kvm_cache_regs.h | 5 -
arch/x86/kvm/kvm_emulate.h | 47 +-
arch/x86/kvm/lapic.c | 14 +-
arch/x86/kvm/lapic.h | 7 +-
arch/x86/kvm/mmu/mmu.c | 1 +
arch/x86/kvm/smm.c | 573 ++++++++++++++++++
arch/x86/kvm/smm.h | 38 ++
arch/x86/kvm/svm/nested.c | 3 +
arch/x86/kvm/svm/svm.c | 11 +-
arch/x86/kvm/vmx/nested.c | 1 +
arch/x86/kvm/vmx/vmx.c | 7 +
arch/x86/kvm/x86.c | 353 +----------
tools/testing/selftests/kvm/x86_64/smm_test.c | 2 +
18 files changed, 737 insertions(+), 718 deletions(-)
create mode 100644 arch/x86/kvm/smm.c
create mode 100644 arch/x86/kvm/smm.h

--
2.31.1


2022-09-29 17:38:14

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH v2 4/8] KVM: x86: do not go through ctxt->ops when emulating rsm

Now that RSM is implemented in a single emulator callback, there is no
point in going through other callbacks for the sake of modifying
processor state. Just invoke KVM's own internal functions directly,
and remove the callbacks that were only used by em_rsm; the only
substantial difference is in the handling of the segment registers
and descriptor cache, which have to be parsed into a struct kvm_segment
instead of a struct desc_struct.

This also fixes a bug where emulator_set_segment was shifting the
limit left by 12 if the G bit is set, but the limit had not been
shifted right upon entry to SMM.

The emulator context is still used to restore EIP and the general
purpose registers.

Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/kvm_emulate.h | 13 ---
arch/x86/kvm/smm.c | 177 +++++++++++++++++--------------------
arch/x86/kvm/x86.c | 33 -------
3 files changed, 81 insertions(+), 142 deletions(-)

diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index d7afbc448dd2..84b1f2661463 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -116,16 +116,6 @@ struct x86_emulate_ops {
unsigned int bytes,
struct x86_exception *fault, bool system);

- /*
- * read_phys: Read bytes of standard (non-emulated/special) memory.
- * Used for descriptor reading.
- * @addr: [IN ] Physical address from which to read.
- * @val: [OUT] Value read from memory.
- * @bytes: [IN ] Number of bytes to read from memory.
- */
- int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr,
- void *val, unsigned int bytes);
-
/*
* write_std: Write bytes of standard (non-emulated/special) memory.
* Used for descriptor writing.
@@ -209,11 +199,8 @@ struct x86_emulate_ops {
int (*cpl)(struct x86_emulate_ctxt *ctxt);
void (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
- u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt);
- void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase);
int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
int (*get_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
- int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 773e07b6397d..41ca128478fc 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -271,71 +271,59 @@ void enter_smm(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
}

-static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
-{
-#ifdef CONFIG_X86_64
- return ctxt->ops->guest_has_long_mode(ctxt);
-#else
- return false;
-#endif
-}
-
-static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
+static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
{
desc->g = (flags >> 23) & 1;
- desc->d = (flags >> 22) & 1;
+ desc->db = (flags >> 22) & 1;
desc->l = (flags >> 21) & 1;
desc->avl = (flags >> 20) & 1;
- desc->p = (flags >> 15) & 1;
+ desc->present = (flags >> 15) & 1;
desc->dpl = (flags >> 13) & 3;
desc->s = (flags >> 12) & 1;
desc->type = (flags >> 8) & 15;
+
+ desc->unusable = !desc->present;
+ desc->padding = 0;
}

-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+static int rsm_load_seg_32(struct kvm_vcpu *vcpu, const char *smstate,
int n)
{
- struct desc_struct desc;
+ struct kvm_segment desc;
int offset;
- u16 selector;
-
- selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);

if (n < 3)
offset = 0x7f84 + n * 12;
else
offset = 0x7f2c + (n - 3) * 12;

- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ desc.selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
+ desc.base = GET_SMSTATE(u32, smstate, offset + 8);
+ desc.limit = GET_SMSTATE(u32, smstate, offset + 4);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
+ kvm_set_segment(vcpu, &desc, n);
return X86EMUL_CONTINUE;
}

#ifdef CONFIG_X86_64
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
+static int rsm_load_seg_64(struct kvm_vcpu *vcpu, const char *smstate,
int n)
{
- struct desc_struct desc;
+ struct kvm_segment desc;
int offset;
- u16 selector;
- u32 base3;

offset = 0x7e00 + n * 16;

- selector = GET_SMSTATE(u16, smstate, offset);
+ desc.selector = GET_SMSTATE(u16, smstate, offset);
rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
- base3 = GET_SMSTATE(u32, smstate, offset + 12);
-
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
+ desc.limit = GET_SMSTATE(u32, smstate, offset + 4);
+ desc.base = GET_SMSTATE(u64, smstate, offset + 8);
+ kvm_set_segment(vcpu, &desc, n);
return X86EMUL_CONTINUE;
}
#endif

-static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
u64 cr0, u64 cr3, u64 cr4)
{
int bad;
@@ -348,7 +336,7 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
cr3 &= ~0xfff;
}

- bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+ bad = kvm_set_cr3(vcpu, cr3);
if (bad)
return X86EMUL_UNHANDLEABLE;

@@ -357,20 +345,20 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
* Then enable protected mode. However, PCID cannot be enabled
* if EFER.LMA=0, so set it separately.
*/
- bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+ bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
if (bad)
return X86EMUL_UNHANDLEABLE;

- bad = ctxt->ops->set_cr(ctxt, 0, cr0);
+ bad = kvm_set_cr0(vcpu, cr0);
if (bad)
return X86EMUL_UNHANDLEABLE;

if (cr4 & X86_CR4_PCIDE) {
- bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+ bad = kvm_set_cr4(vcpu, cr4);
if (bad)
return X86EMUL_UNHANDLEABLE;
if (pcid) {
- bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+ bad = kvm_set_cr3(vcpu, cr3 | pcid);
if (bad)
return X86EMUL_UNHANDLEABLE;
}
@@ -383,9 +371,9 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
const char *smstate)
{
- struct desc_struct desc;
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+ struct kvm_segment desc;
struct desc_ptr dt;
- u16 selector;
u32 val, cr0, cr3, cr4;
int i;

@@ -399,56 +387,55 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,

val = GET_SMSTATE(u32, smstate, 0x7fcc);

- if (ctxt->ops->set_dr(ctxt, 6, val))
+ if (kvm_set_dr(vcpu, 6, val))
return X86EMUL_UNHANDLEABLE;

val = GET_SMSTATE(u32, smstate, 0x7fc8);

- if (ctxt->ops->set_dr(ctxt, 7, val))
+ if (kvm_set_dr(vcpu, 7, val))
return X86EMUL_UNHANDLEABLE;

- selector = GET_SMSTATE(u32, smstate, 0x7fc4);
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
+ desc.selector = GET_SMSTATE(u32, smstate, 0x7fc4);
+ desc.base = GET_SMSTATE(u32, smstate, 0x7f64);
+ desc.limit = GET_SMSTATE(u32, smstate, 0x7f60);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
+ kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);

- selector = GET_SMSTATE(u32, smstate, 0x7fc0);
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
+ desc.selector = GET_SMSTATE(u32, smstate, 0x7fc0);
+ desc.base = GET_SMSTATE(u32, smstate, 0x7f80);
+ desc.limit = GET_SMSTATE(u32, smstate, 0x7f7c);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
+ kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);

dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
- ctxt->ops->set_gdt(ctxt, &dt);
+ static_call(kvm_x86_set_gdt)(vcpu, &dt);

dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
- ctxt->ops->set_idt(ctxt, &dt);
+ static_call(kvm_x86_set_idt)(vcpu, &dt);

for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_32(ctxt, smstate, i);
+ int r = rsm_load_seg_32(vcpu, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}

cr4 = GET_SMSTATE(u32, smstate, 0x7f14);

- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
+ vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7ef8);

- return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ return rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
}

#ifdef CONFIG_X86_64
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
const char *smstate)
{
- struct desc_struct desc;
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+ struct kvm_segment desc;
struct desc_ptr dt;
u64 val, cr0, cr3, cr4;
- u32 base3;
- u16 selector;
int i, r;

for (i = 0; i < NR_EMULATOR_GPRS; i++)
@@ -459,51 +446,49 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,

val = GET_SMSTATE(u64, smstate, 0x7f68);

- if (ctxt->ops->set_dr(ctxt, 6, val))
+ if (kvm_set_dr(vcpu, 6, val))
return X86EMUL_UNHANDLEABLE;

val = GET_SMSTATE(u64, smstate, 0x7f60);

- if (ctxt->ops->set_dr(ctxt, 7, val))
+ if (kvm_set_dr(vcpu, 7, val))
return X86EMUL_UNHANDLEABLE;

cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
+ vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7f00);
val = GET_SMSTATE(u64, smstate, 0x7ed0);

- if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
+ if (kvm_set_msr(vcpu, MSR_EFER, val & ~EFER_LMA))
return X86EMUL_UNHANDLEABLE;

- selector = GET_SMSTATE(u32, smstate, 0x7e90);
+ desc.selector = GET_SMSTATE(u32, smstate, 0x7e90);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
- base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
+ desc.limit = GET_SMSTATE(u32, smstate, 0x7e94);
+ desc.base = GET_SMSTATE(u64, smstate, 0x7e98);
+ kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);

dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
- ctxt->ops->set_idt(ctxt, &dt);
+ static_call(kvm_x86_set_idt)(vcpu, &dt);

- selector = GET_SMSTATE(u32, smstate, 0x7e70);
+ desc.selector = GET_SMSTATE(u32, smstate, 0x7e70);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
- base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
+ desc.limit = GET_SMSTATE(u32, smstate, 0x7e74);
+ desc.base = GET_SMSTATE(u64, smstate, 0x7e78);
+ kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);

dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
- ctxt->ops->set_gdt(ctxt, &dt);
+ static_call(kvm_x86_set_gdt)(vcpu, &dt);

- r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ r = rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
if (r != X86EMUL_CONTINUE)
return r;

for (i = 0; i < 6; i++) {
- r = rsm_load_seg_64(ctxt, smstate, i);
+ r = rsm_load_seg_64(vcpu, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}
@@ -520,14 +505,14 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
u64 smbase;
int ret;

- smbase = ctxt->ops->get_smbase(ctxt);
+ smbase = vcpu->arch.smbase;

- ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
- if (ret != X86EMUL_CONTINUE)
+ ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf));
+ if (ret < 0)
return X86EMUL_UNHANDLEABLE;

- if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
- ctxt->ops->set_nmi_mask(ctxt, false);
+ if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
+ static_call(kvm_x86_set_nmi_mask)(vcpu, false);

kvm_smm_changed(vcpu, false);

@@ -535,41 +520,41 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
* Get back to real mode, to prepare a safe state in which to load
* CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
* supports long mode.
- *
- * The ctxt->ops callbacks will handle all side effects when writing
- * writing MSRs and CRs, e.g. MMU context resets, CPUID
- * runtime updates, etc.
*/
- if (emulator_has_longmode(ctxt)) {
- struct desc_struct cs_desc;
+#ifdef CONFIG_X86_64
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
+ struct kvm_segment cs_desc;

/* Zero CR4.PCIDE before CR0.PG. */
- cr4 = ctxt->ops->get_cr(ctxt, 4);
+ cr4 = kvm_read_cr4(vcpu);
if (cr4 & X86_CR4_PCIDE)
- ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+ kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);

/* A 32-bit code segment is required to clear EFER.LMA. */
memset(&cs_desc, 0, sizeof(cs_desc));
cs_desc.type = 0xb;
- cs_desc.s = cs_desc.g = cs_desc.p = 1;
- ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
+ cs_desc.s = cs_desc.g = cs_desc.present = 1;
+ kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
}
+#endif

/* For the 64-bit case, this will clear EFER.LMA. */
- cr0 = ctxt->ops->get_cr(ctxt, 0);
+ cr0 = kvm_read_cr0(vcpu);
if (cr0 & X86_CR0_PE)
- ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
+ kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));

- if (emulator_has_longmode(ctxt)) {
+#ifdef CONFIG_X86_64
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
/* Clear CR4.PAE before clearing EFER.LME. */
- cr4 = ctxt->ops->get_cr(ctxt, 4);
+ cr4 = kvm_read_cr4(vcpu);
if (cr4 & X86_CR4_PAE)
- ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+ kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);

/* And finally go back to 32-bit mode. */
efer = 0;
- ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+ kvm_set_msr(vcpu, MSR_EFER, efer);
}
+#endif

/*
* Give leave_smm() a chance to make ISA-specific changes to the vCPU
@@ -580,7 +565,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
return X86EMUL_UNHANDLEABLE;

#ifdef CONFIG_X86_64
- if (emulator_has_longmode(ctxt))
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
return rsm_load_state_64(ctxt, buf);
else
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 97d6ee179109..97a871635986 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7201,15 +7201,6 @@ static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
}

-static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
- unsigned long addr, void *val, unsigned int bytes)
-{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
- int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
-
- return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
-}
-
static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
struct kvm_vcpu *vcpu, u64 access,
struct x86_exception *exception)
@@ -8001,26 +7992,6 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
}

-static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
- u32 msr_index, u64 data)
-{
- return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
-}
-
-static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
-{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-
- return vcpu->arch.smbase;
-}
-
-static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
-{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-
- vcpu->arch.smbase = smbase;
-}
-
static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
u32 pmc)
{
@@ -8119,7 +8090,6 @@ static const struct x86_emulate_ops emulate_ops = {
.write_gpr = emulator_write_gpr,
.read_std = emulator_read_std,
.write_std = emulator_write_std,
- .read_phys = kvm_read_guest_phys_system,
.fetch = kvm_fetch_guest_virt,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
@@ -8139,11 +8109,8 @@ static const struct x86_emulate_ops emulate_ops = {
.cpl = emulator_get_cpl,
.get_dr = emulator_get_dr,
.set_dr = emulator_set_dr,
- .get_smbase = emulator_get_smbase,
- .set_smbase = emulator_set_smbase,
.set_msr_with_filter = emulator_set_msr_with_filter,
.get_msr_with_filter = emulator_get_msr_with_filter,
- .set_msr = emulator_set_msr,
.get_msr = emulator_get_msr,
.check_pmc = emulator_check_pmc,
.read_pmc = emulator_read_pmc,
--
2.31.1


2022-09-29 17:38:23

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH v2 5/8] KVM: allow compiling out SMM support

Some users of KVM implement the UEFI variable store through a paravirtual device
that does not require the "SMM lockbox" component of edk2; allow them to
compile out system management mode, which is not a full implementation
especially in how it interacts with nested virtualization.

Suggested-by: Sean Christopherson <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/Kconfig | 11 ++++++++++
arch/x86/kvm/Makefile | 2 +-
arch/x86/kvm/smm.h | 13 ++++++++++++
arch/x86/kvm/svm/svm.c | 2 ++
arch/x86/kvm/vmx/vmx.c | 2 ++
arch/x86/kvm/x86.c | 21 +++++++++++++++++--
tools/testing/selftests/kvm/x86_64/smm_test.c | 2 ++
7 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a107df22ffee..1679f9b4e96d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -88,6 +88,17 @@ config KVM_INTEL
To compile this as a module, choose M here: the module
will be called kvm-intel.

+config KVM_SMM
+ bool "System Management Mode emulation"
+ default y
+ depends on KVM
+ help
+ Provides support for KVM to emulate System Management Mode (SMM)
+ in virtual machines. This can be used by the virtual machine
+ firmware to implement UEFI secure boot.
+
+ If unsure, say Y.
+
config X86_SGX_KVM
bool "Software Guard eXtensions (SGX) Virtualization"
depends on X86_SGX && KVM_INTEL
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ec6f7656254b..6cf40f668277 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -20,7 +20,7 @@ endif

kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
kvm-$(CONFIG_KVM_XEN) += xen.o
-kvm-y += smm.o
+kvm-$(CONFIG_KVM_SMM) += smm.o

kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index b0602a92e511..4c699fee4492 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -8,6 +8,7 @@
#define PUT_SMSTATE(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val

+#ifdef CONFIG_KVM_SMM
static inline int kvm_inject_smi(struct kvm_vcpu *vcpu)
{
kvm_make_request(KVM_REQ_SMI, vcpu);
@@ -23,5 +24,17 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
void enter_smm(struct kvm_vcpu *vcpu);
int emulator_leave_smm(struct x86_emulate_ctxt *ctxt);
void process_smi(struct kvm_vcpu *vcpu);
+#else
+static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
+static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
+static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); }
+static inline void enter_smm(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
+static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
+
+/*
+ * emulator_leave_smm is used as a function pointer, so the
+ * stub is defined in x86.c.
+ */
+#endif

#endif
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 496ee7d1ae2f..6f7ceb35d2ff 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4150,6 +4150,8 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
return false;
case MSR_IA32_SMBASE:
+ if (!IS_ENABLED(CONFIG_KVM_SMM))
+ return false;
/* SEV-ES guests do not support SMM, so report false */
if (kvm && sev_es_guest(kvm))
return false;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 038809c68006..b22330a15adb 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6841,6 +6841,8 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
{
switch (index) {
case MSR_IA32_SMBASE:
+ if (!IS_ENABLED(CONFIG_KVM_SMM))
+ return false;
/*
* We cannot do SMM unless we can run the guest in big
* real mode.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 97a871635986..a9e050aefea6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3636,7 +3636,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
}
case MSR_IA32_SMBASE:
- if (!msr_info->host_initiated)
+ if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated)
return 1;
vcpu->arch.smbase = data;
break;
@@ -4052,7 +4052,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.ia32_misc_enable_msr;
break;
case MSR_IA32_SMBASE:
- if (!msr_info->host_initiated)
+ if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated)
return 1;
msr_info->data = vcpu->arch.smbase;
break;
@@ -4426,6 +4426,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r |= KVM_X86_DISABLE_EXITS_MWAIT;
break;
case KVM_CAP_X86_SMM:
+ if (!IS_ENABLED(CONFIG_KVM_SMM))
+ break;
+
/* SMBASE is usually relocated above 1M on modern chipsets,
* and SMM handlers might indeed rely on 4G segment limits,
* so do not report SMM to be available if real mode is
@@ -5176,6 +5179,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.apic->sipi_vector = events->sipi_vector;

if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+ if (!IS_ENABLED(CONFIG_KVM_SMM) &&
+ (events->smi.smm ||
+ events->smi.pending ||
+ events->smi.smm_inside_nmi))
+ return -EINVAL;
+
if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
kvm_x86_ops.nested_ops->leave_nested(vcpu);
kvm_smm_changed(vcpu, events->smi.smm);
@@ -8066,6 +8075,14 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
return emul_to_vcpu(ctxt)->arch.hflags;
}

+#ifndef CONFIG_KVM_SMM
+static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
+{
+ WARN_ON_ONCE(1);
+ return X86EMUL_UNHANDLEABLE;
+}
+#endif
+
static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
{
kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 1f136a81858e..cb38a478e1f6 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -137,6 +137,8 @@ int main(int argc, char *argv[])
struct kvm_x86_state *state;
int stage, stage_reported;

+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
+
/* Create VM */
vm = vm_create_with_one_vcpu(&vcpu, guest_code);

--
2.31.1


2022-09-29 17:39:56

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH v2 8/8] KVM: x86: do not define KVM_REQ_SMI if SMM disabled

This ensures that all the relevant code is compiled out, in fact
the process_smi stub can be removed too.

Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/smm.h | 1 -
arch/x86/kvm/x86.c | 6 ++++++
3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d11697504471..d58d4a62b227 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -81,7 +81,9 @@
#define KVM_REQ_NMI KVM_ARCH_REQ(9)
#define KVM_REQ_PMU KVM_ARCH_REQ(10)
#define KVM_REQ_PMI KVM_ARCH_REQ(11)
+#ifdef CONFIG_KVM_SMM
#define KVM_REQ_SMI KVM_ARCH_REQ(12)
+#endif
#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13)
#define KVM_REQ_MCLOCK_INPROGRESS \
KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index 7ccce6b655ca..a6795b93ba30 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -28,7 +28,6 @@ void process_smi(struct kvm_vcpu *vcpu);
static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); }
-static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }

/*
* emulator_leave_smm is used as a function pointer, so the
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e22184bad92b..ba5661ee3fd7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5020,8 +5020,10 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,

process_nmi(vcpu);

+#ifdef CONFIG_KVM_SMM
if (kvm_check_request(KVM_REQ_SMI, vcpu))
process_smi(vcpu);
+#endif

/*
* KVM's ABI only allows for one exception to be migrated. Luckily,
@@ -10194,8 +10196,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
record_steal_time(vcpu);
+#ifdef CONFIG_KVM_SMM
if (kvm_check_request(KVM_REQ_SMI, vcpu))
process_smi(vcpu);
+#endif
if (kvm_check_request(KVM_REQ_NMI, vcpu))
process_nmi(vcpu);
if (kvm_check_request(KVM_REQ_PMU, vcpu))
@@ -12539,7 +12543,9 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
return true;

if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+#ifdef CONFIG_KVM_SMM
kvm_test_request(KVM_REQ_SMI, vcpu) ||
+#endif
kvm_test_request(KVM_REQ_EVENT, vcpu))
return true;

--
2.31.1

2022-09-29 17:42:04

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH v2 6/8] KVM: x86: compile out vendor-specific code if SMM is disabled

Vendor-specific code that deals with SMI injection and saving/restoring
SMM state is not needed if CONFIG_KVM_SMM is disabled, so remove the
four callbacks smi_allowed, enter_smm, leave_smm and enable_smi_window.
The users in svm/nested.c and x86.c also have to be compiled out; the
amount of #ifdef'ed code is small and it's not worth moving it to
smm.c.

enter_smm is now used only within #ifdef CONFIG_KVM_SMM, and the stub
can therefore be removed.

Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/include/asm/kvm-x86-ops.h | 2 ++
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/smm.h | 1 -
arch/x86/kvm/svm/nested.c | 2 ++
arch/x86/kvm/svm/svm.c | 4 ++++
arch/x86/kvm/vmx/vmx.c | 4 ++++
arch/x86/kvm/x86.c | 4 ++++
7 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 82ba4a564e58..ea58e67e9a67 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -110,10 +110,12 @@ KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)
KVM_X86_OP_OPTIONAL(set_hv_timer)
KVM_X86_OP_OPTIONAL(cancel_hv_timer)
KVM_X86_OP(setup_mce)
+#ifdef CONFIG_KVM_SMM
KVM_X86_OP(smi_allowed)
KVM_X86_OP(enter_smm)
KVM_X86_OP(leave_smm)
KVM_X86_OP(enable_smi_window)
+#endif
KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
KVM_X86_OP_OPTIONAL(mem_enc_register_region)
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b7d078cd768d..cb88da02d965 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1606,10 +1606,12 @@ struct kvm_x86_ops {

void (*setup_mce)(struct kvm_vcpu *vcpu);

+#ifdef CONFIG_KVM_SMM
int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
+#endif

int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index 4c699fee4492..7ccce6b655ca 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -28,7 +28,6 @@ void process_smi(struct kvm_vcpu *vcpu);
static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); }
-static inline void enter_smm(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }

/*
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index cc0fd75f7cba..b258d6988f5d 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1378,6 +1378,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
return 0;
}

+#ifdef CONFIG_KVM_SMM
if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
if (block_nested_events)
return -EBUSY;
@@ -1386,6 +1387,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
nested_svm_simple_vmexit(svm, SVM_EXIT_SMI);
return 0;
}
+#endif

if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) {
if (block_nested_events)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6f7ceb35d2ff..2200b8aa7273 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4408,6 +4408,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
vcpu->arch.mcg_cap &= 0x1ff;
}

+#ifdef CONFIG_KVM_SMM
bool svm_smi_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4557,6 +4558,7 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
/* We must be in SMM; RSM will cause a vmexit anyway. */
}
}
+#endif

static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len)
@@ -4832,10 +4834,12 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.pi_update_irte = avic_pi_update_irte,
.setup_mce = svm_setup_mce,

+#ifdef CONFIG_KVM_SMM
.smi_allowed = svm_smi_allowed,
.enter_smm = svm_enter_smm,
.leave_smm = svm_leave_smm,
.enable_smi_window = svm_enable_smi_window,
+#endif

.mem_enc_ioctl = sev_mem_enc_ioctl,
.mem_enc_register_region = sev_mem_enc_register_region,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b22330a15adb..107fc035c91b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7905,6 +7905,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
~FEAT_CTL_LMCE_ENABLED;
}

+#ifdef CONFIG_KVM_SMM
static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
/* we need a nested vmexit to enter SMM, postpone if run is pending */
@@ -7959,6 +7960,7 @@ static void vmx_enable_smi_window(struct kvm_vcpu *vcpu)
{
/* RSM will cause a vmexit anyway. */
}
+#endif

static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
{
@@ -8126,10 +8128,12 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {

.setup_mce = vmx_setup_mce,

+#ifdef CONFIG_KVM_SMM
.smi_allowed = vmx_smi_allowed,
.enter_smm = vmx_enter_smm,
.leave_smm = vmx_leave_smm,
.enable_smi_window = vmx_enable_smi_window,
+#endif

.can_emulate_instruction = vmx_can_emulate_instruction,
.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a9e050aefea6..e22184bad92b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9863,6 +9863,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
* in order to make progress and get back here for another iteration.
* The kvm_x86_ops hooks communicate this by returning -EBUSY.
*/
+#ifdef CONFIG_KVM_SMM
if (vcpu->arch.smi_pending) {
r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY;
if (r < 0)
@@ -9875,6 +9876,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
} else
static_call(kvm_x86_enable_smi_window)(vcpu);
}
+#endif

if (vcpu->arch.nmi_pending) {
r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY;
@@ -12491,10 +12493,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
static_call(kvm_x86_nmi_allowed)(vcpu, false)))
return true;

+#ifdef CONFIG_KVM_SMM
if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
(vcpu->arch.smi_pending &&
static_call(kvm_x86_smi_allowed)(vcpu, false)))
return true;
+#endif

if (kvm_arch_interrupt_allowed(vcpu) &&
(kvm_cpu_has_interrupt(vcpu) ||
--
2.31.1


2022-09-29 17:42:08

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH v2 2/8] KVM: x86: move SMM entry to a new file

Some users of KVM implement the UEFI variable store through a paravirtual
device that does not require the "SMM lockbox" component of edk2, and
would like to compile out system management mode. In preparation for
that, move the SMM entry code out of x86.c and into a new file.

Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/smm.c | 235 +++++++++++++++++++++++++++++++
arch/x86/kvm/smm.h | 1 +
arch/x86/kvm/x86.c | 239 +-------------------------------
4 files changed, 239 insertions(+), 237 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0ca8f28854ab..b7d078cd768d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1838,6 +1838,7 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);

void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
+void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);

diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index b91c48d91f6e..26a6859e421f 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -5,6 +5,7 @@
#include "kvm_cache_regs.h"
#include "kvm_emulate.h"
#include "smm.h"
+#include "cpuid.h"
#include "trace.h"

void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
@@ -35,3 +36,237 @@ void process_smi(struct kvm_vcpu *vcpu)
vcpu->arch.smi_pending = true;
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
+
+static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
+{
+ u32 flags = 0;
+ flags |= seg->g << 23;
+ flags |= seg->db << 22;
+ flags |= seg->l << 21;
+ flags |= seg->avl << 20;
+ flags |= seg->present << 15;
+ flags |= seg->dpl << 13;
+ flags |= seg->s << 12;
+ flags |= seg->type << 8;
+ return flags;
+}
+
+static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+{
+ struct kvm_segment seg;
+ int offset;
+
+ kvm_get_segment(vcpu, &seg, n);
+ PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
+
+ if (n < 3)
+ offset = 0x7f84 + n * 12;
+ else
+ offset = 0x7f2c + (n - 3) * 12;
+
+ PUT_SMSTATE(u32, buf, offset + 8, seg.base);
+ PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
+ PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
+}
+
+#ifdef CONFIG_X86_64
+static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+{
+ struct kvm_segment seg;
+ int offset;
+ u16 flags;
+
+ kvm_get_segment(vcpu, &seg, n);
+ offset = 0x7e00 + n * 16;
+
+ flags = enter_smm_get_segment_flags(&seg) >> 8;
+ PUT_SMSTATE(u16, buf, offset, seg.selector);
+ PUT_SMSTATE(u16, buf, offset + 2, flags);
+ PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
+ PUT_SMSTATE(u64, buf, offset + 8, seg.base);
+}
+#endif
+
+static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+{
+ struct desc_ptr dt;
+ struct kvm_segment seg;
+ unsigned long val;
+ int i;
+
+ PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+
+ for (i = 0; i < 8; i++)
+ PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
+
+ kvm_get_dr(vcpu, 6, &val);
+ PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
+ kvm_get_dr(vcpu, 7, &val);
+ PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
+
+ kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+ PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
+ PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
+ PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
+ PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
+
+ kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+ PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
+ PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
+ PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
+ PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
+
+ static_call(kvm_x86_get_gdt)(vcpu, &dt);
+ PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
+ PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
+
+ static_call(kvm_x86_get_idt)(vcpu, &dt);
+ PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
+ PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
+
+ for (i = 0; i < 6; i++)
+ enter_smm_save_seg_32(vcpu, buf, i);
+
+ PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+
+ /* revision id */
+ PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
+ PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
+}
+
+#ifdef CONFIG_X86_64
+static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+{
+ struct desc_ptr dt;
+ struct kvm_segment seg;
+ unsigned long val;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
+
+ PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
+
+ kvm_get_dr(vcpu, 6, &val);
+ PUT_SMSTATE(u64, buf, 0x7f68, val);
+ kvm_get_dr(vcpu, 7, &val);
+ PUT_SMSTATE(u64, buf, 0x7f60, val);
+
+ PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
+ PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
+ PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+
+ PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
+
+ /* revision id */
+ PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
+
+ PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
+
+ kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+ PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
+ PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
+ PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
+ PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
+
+ static_call(kvm_x86_get_idt)(vcpu, &dt);
+ PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
+ PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
+
+ kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+ PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
+ PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
+ PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
+ PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
+
+ static_call(kvm_x86_get_gdt)(vcpu, &dt);
+ PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
+ PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
+
+ for (i = 0; i < 6; i++)
+ enter_smm_save_seg_64(vcpu, buf, i);
+}
+#endif
+
+void enter_smm(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment cs, ds;
+ struct desc_ptr dt;
+ unsigned long cr0;
+ char buf[512];
+
+ memset(buf, 0, 512);
+#ifdef CONFIG_X86_64
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ enter_smm_save_state_64(vcpu, buf);
+ else
+#endif
+ enter_smm_save_state_32(vcpu, buf);
+
+ /*
+ * Give enter_smm() a chance to make ISA-specific changes to the vCPU
+ * state (e.g. leave guest mode) after we've saved the state into the
+ * SMM state-save area.
+ */
+ static_call(kvm_x86_enter_smm)(vcpu, buf);
+
+ kvm_smm_changed(vcpu, true);
+ kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
+
+ if (static_call(kvm_x86_get_nmi_mask)(vcpu))
+ vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
+ else
+ static_call(kvm_x86_set_nmi_mask)(vcpu, true);
+
+ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
+ kvm_rip_write(vcpu, 0x8000);
+
+ cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
+ static_call(kvm_x86_set_cr0)(vcpu, cr0);
+ vcpu->arch.cr0 = cr0;
+
+ static_call(kvm_x86_set_cr4)(vcpu, 0);
+
+ /* Undocumented: IDT limit is set to zero on entry to SMM. */
+ dt.address = dt.size = 0;
+ static_call(kvm_x86_set_idt)(vcpu, &dt);
+
+ kvm_set_dr(vcpu, 7, DR7_FIXED_1);
+
+ cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
+ cs.base = vcpu->arch.smbase;
+
+ ds.selector = 0;
+ ds.base = 0;
+
+ cs.limit = ds.limit = 0xffffffff;
+ cs.type = ds.type = 0x3;
+ cs.dpl = ds.dpl = 0;
+ cs.db = ds.db = 0;
+ cs.s = ds.s = 1;
+ cs.l = ds.l = 0;
+ cs.g = ds.g = 1;
+ cs.avl = ds.avl = 0;
+ cs.present = ds.present = 1;
+ cs.unusable = ds.unusable = 0;
+ cs.padding = ds.padding = 0;
+
+ kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
+
+#ifdef CONFIG_X86_64
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ static_call(kvm_x86_set_efer)(vcpu, 0);
+#endif
+
+ kvm_update_cpuid_runtime(vcpu);
+ kvm_mmu_reset_context(vcpu);
+}
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index d85d4ccd32dd..aacc6dac2c99 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -20,6 +20,7 @@ static inline bool is_smm(struct kvm_vcpu *vcpu)
}

void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
+void enter_smm(struct kvm_vcpu *vcpu);
void process_smi(struct kvm_vcpu *vcpu);

#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e0e461958c81..476b4a6e81ab 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -120,7 +120,6 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;

static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
-static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
static void store_regs(struct kvm_vcpu *vcpu);
static int sync_regs(struct kvm_vcpu *vcpu);
@@ -7043,8 +7042,8 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
return handled;
}

-static void kvm_set_segment(struct kvm_vcpu *vcpu,
- struct kvm_segment *var, int seg)
+void kvm_set_segment(struct kvm_vcpu *vcpu,
+ struct kvm_segment *var, int seg)
{
static_call(kvm_x86_set_segment)(vcpu, var, seg);
}
@@ -9968,240 +9967,6 @@ static void process_nmi(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
}

-static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
-{
- u32 flags = 0;
- flags |= seg->g << 23;
- flags |= seg->db << 22;
- flags |= seg->l << 21;
- flags |= seg->avl << 20;
- flags |= seg->present << 15;
- flags |= seg->dpl << 13;
- flags |= seg->s << 12;
- flags |= seg->type << 8;
- return flags;
-}
-
-static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
-{
- struct kvm_segment seg;
- int offset;
-
- kvm_get_segment(vcpu, &seg, n);
- PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
-
- if (n < 3)
- offset = 0x7f84 + n * 12;
- else
- offset = 0x7f2c + (n - 3) * 12;
-
- PUT_SMSTATE(u32, buf, offset + 8, seg.base);
- PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
- PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
-}
-
-#ifdef CONFIG_X86_64
-static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
-{
- struct kvm_segment seg;
- int offset;
- u16 flags;
-
- kvm_get_segment(vcpu, &seg, n);
- offset = 0x7e00 + n * 16;
-
- flags = enter_smm_get_segment_flags(&seg) >> 8;
- PUT_SMSTATE(u16, buf, offset, seg.selector);
- PUT_SMSTATE(u16, buf, offset + 2, flags);
- PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
- PUT_SMSTATE(u64, buf, offset + 8, seg.base);
-}
-#endif
-
-static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
-{
- struct desc_ptr dt;
- struct kvm_segment seg;
- unsigned long val;
- int i;
-
- PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
- PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
- PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
- PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
-
- for (i = 0; i < 8; i++)
- PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
-
- kvm_get_dr(vcpu, 6, &val);
- PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
- kvm_get_dr(vcpu, 7, &val);
- PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
-
- kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
- PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
- PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
- PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
-
- kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
- PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
- PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
- PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
-
- static_call(kvm_x86_get_gdt)(vcpu, &dt);
- PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
- PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
-
- static_call(kvm_x86_get_idt)(vcpu, &dt);
- PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
- PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
-
- for (i = 0; i < 6; i++)
- enter_smm_save_seg_32(vcpu, buf, i);
-
- PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
-
- /* revision id */
- PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
- PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
-}
-
-#ifdef CONFIG_X86_64
-static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
-{
- struct desc_ptr dt;
- struct kvm_segment seg;
- unsigned long val;
- int i;
-
- for (i = 0; i < 16; i++)
- PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
-
- PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
- PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
-
- kvm_get_dr(vcpu, 6, &val);
- PUT_SMSTATE(u64, buf, 0x7f68, val);
- kvm_get_dr(vcpu, 7, &val);
- PUT_SMSTATE(u64, buf, 0x7f60, val);
-
- PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
- PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
- PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
-
- PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
-
- /* revision id */
- PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
-
- PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
-
- kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
- PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
- PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
- PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
-
- static_call(kvm_x86_get_idt)(vcpu, &dt);
- PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
- PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
-
- kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
- PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
- PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
- PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
-
- static_call(kvm_x86_get_gdt)(vcpu, &dt);
- PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
- PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
-
- for (i = 0; i < 6; i++)
- enter_smm_save_seg_64(vcpu, buf, i);
-}
-#endif
-
-static void enter_smm(struct kvm_vcpu *vcpu)
-{
- struct kvm_segment cs, ds;
- struct desc_ptr dt;
- unsigned long cr0;
- char buf[512];
-
- memset(buf, 0, 512);
-#ifdef CONFIG_X86_64
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- enter_smm_save_state_64(vcpu, buf);
- else
-#endif
- enter_smm_save_state_32(vcpu, buf);
-
- /*
- * Give enter_smm() a chance to make ISA-specific changes to the vCPU
- * state (e.g. leave guest mode) after we've saved the state into the
- * SMM state-save area.
- */
- static_call(kvm_x86_enter_smm)(vcpu, buf);
-
- kvm_smm_changed(vcpu, true);
- kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
-
- if (static_call(kvm_x86_get_nmi_mask)(vcpu))
- vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
- else
- static_call(kvm_x86_set_nmi_mask)(vcpu, true);
-
- kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
- kvm_rip_write(vcpu, 0x8000);
-
- cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
- static_call(kvm_x86_set_cr0)(vcpu, cr0);
- vcpu->arch.cr0 = cr0;
-
- static_call(kvm_x86_set_cr4)(vcpu, 0);
-
- /* Undocumented: IDT limit is set to zero on entry to SMM. */
- dt.address = dt.size = 0;
- static_call(kvm_x86_set_idt)(vcpu, &dt);
-
- kvm_set_dr(vcpu, 7, DR7_FIXED_1);
-
- cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
- cs.base = vcpu->arch.smbase;
-
- ds.selector = 0;
- ds.base = 0;
-
- cs.limit = ds.limit = 0xffffffff;
- cs.type = ds.type = 0x3;
- cs.dpl = ds.dpl = 0;
- cs.db = ds.db = 0;
- cs.s = ds.s = 1;
- cs.l = ds.l = 0;
- cs.g = ds.g = 1;
- cs.avl = ds.avl = 0;
- cs.present = ds.present = 1;
- cs.unusable = ds.unusable = 0;
- cs.padding = ds.padding = 0;
-
- kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
- kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
- kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
- kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
- kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
- kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
-
-#ifdef CONFIG_X86_64
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- static_call(kvm_x86_set_efer)(vcpu, 0);
-#endif
-
- kvm_update_cpuid_runtime(vcpu);
- kvm_mmu_reset_context(vcpu);
-}
-
void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
unsigned long *vcpu_bitmap)
{
--
2.31.1


2022-09-29 17:54:26

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH v2 3/8] KVM: x86: move SMM exit to a new file

Some users of KVM implement the UEFI variable store through a paravirtual
device that does not require the "SMM lockbox" component of edk2, and
would like to compile out system management mode. In preparation for
that, move the SMM exit code out of emulate.c and into a new file.

The code is still written as a series of invocations of the emulator
callbacks, but the two exiting_smm and leave_smm callbacks are merged
into one, and all the code from em_rsm is now part of the callback.
This removes all knowledge of the format of the SMM save state area
from the emulator. Further patches will clean up the code and
invoke KVM's own functions to access control registers, descriptor
caches, etc.

Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/emulate.c | 356 +------------------------------------
arch/x86/kvm/kvm_emulate.h | 34 +++-
arch/x86/kvm/smm.c | 316 ++++++++++++++++++++++++++++++++
arch/x86/kvm/smm.h | 1 +
arch/x86/kvm/x86.c | 14 --
5 files changed, 351 insertions(+), 370 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5208a13e40e0..af3f70b52f85 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -30,7 +30,6 @@
#include "tss.h"
#include "mmu.h"
#include "pmu.h"
-#include "smm.h"

/*
* Operand types
@@ -243,37 +242,6 @@ enum x86_transfer_type {
X86_TRANSFER_TASK_SWITCH,
};

-static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
-{
- if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
- nr &= NR_EMULATOR_GPRS - 1;
-
- if (!(ctxt->regs_valid & (1 << nr))) {
- ctxt->regs_valid |= 1 << nr;
- ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
- }
- return ctxt->_regs[nr];
-}
-
-static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
-{
- if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
- nr &= NR_EMULATOR_GPRS - 1;
-
- BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
- BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
-
- ctxt->regs_valid |= 1 << nr;
- ctxt->regs_dirty |= 1 << nr;
- return &ctxt->_regs[nr];
-}
-
-static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
-{
- reg_read(ctxt, nr);
- return reg_write(ctxt, nr);
-}
-
static void writeback_registers(struct x86_emulate_ctxt *ctxt)
{
unsigned long dirty = ctxt->regs_dirty;
@@ -2310,334 +2278,14 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
return rc;
}

-static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
-{
-#ifdef CONFIG_X86_64
- return ctxt->ops->guest_has_long_mode(ctxt);
-#else
- return false;
-#endif
-}
-
-static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
-{
- desc->g = (flags >> 23) & 1;
- desc->d = (flags >> 22) & 1;
- desc->l = (flags >> 21) & 1;
- desc->avl = (flags >> 20) & 1;
- desc->p = (flags >> 15) & 1;
- desc->dpl = (flags >> 13) & 3;
- desc->s = (flags >> 12) & 1;
- desc->type = (flags >> 8) & 15;
-}
-
-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
- int n)
-{
- struct desc_struct desc;
- int offset;
- u16 selector;
-
- selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
-
- if (n < 3)
- offset = 0x7f84 + n * 12;
- else
- offset = 0x7f2c + (n - 3) * 12;
-
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
- return X86EMUL_CONTINUE;
-}
-
-#ifdef CONFIG_X86_64
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
- int n)
-{
- struct desc_struct desc;
- int offset;
- u16 selector;
- u32 base3;
-
- offset = 0x7e00 + n * 16;
-
- selector = GET_SMSTATE(u16, smstate, offset);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
- base3 = GET_SMSTATE(u32, smstate, offset + 12);
-
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
- return X86EMUL_CONTINUE;
-}
-#endif
-
-static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
- u64 cr0, u64 cr3, u64 cr4)
-{
- int bad;
- u64 pcid;
-
- /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
- pcid = 0;
- if (cr4 & X86_CR4_PCIDE) {
- pcid = cr3 & 0xfff;
- cr3 &= ~0xfff;
- }
-
- bad = ctxt->ops->set_cr(ctxt, 3, cr3);
- if (bad)
- return X86EMUL_UNHANDLEABLE;
-
- /*
- * First enable PAE, long mode needs it before CR0.PG = 1 is set.
- * Then enable protected mode. However, PCID cannot be enabled
- * if EFER.LMA=0, so set it separately.
- */
- bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
- if (bad)
- return X86EMUL_UNHANDLEABLE;
-
- bad = ctxt->ops->set_cr(ctxt, 0, cr0);
- if (bad)
- return X86EMUL_UNHANDLEABLE;
-
- if (cr4 & X86_CR4_PCIDE) {
- bad = ctxt->ops->set_cr(ctxt, 4, cr4);
- if (bad)
- return X86EMUL_UNHANDLEABLE;
- if (pcid) {
- bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
- if (bad)
- return X86EMUL_UNHANDLEABLE;
- }
-
- }
-
- return X86EMUL_CONTINUE;
-}
-
-static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
-{
- struct desc_struct desc;
- struct desc_ptr dt;
- u16 selector;
- u32 val, cr0, cr3, cr4;
- int i;
-
- cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
- cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
- ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
- ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
-
- for (i = 0; i < NR_EMULATOR_GPRS; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
-
- val = GET_SMSTATE(u32, smstate, 0x7fcc);
-
- if (ctxt->ops->set_dr(ctxt, 6, val))
- return X86EMUL_UNHANDLEABLE;
-
- val = GET_SMSTATE(u32, smstate, 0x7fc8);
-
- if (ctxt->ops->set_dr(ctxt, 7, val))
- return X86EMUL_UNHANDLEABLE;
-
- selector = GET_SMSTATE(u32, smstate, 0x7fc4);
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
-
- selector = GET_SMSTATE(u32, smstate, 0x7fc0);
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
-
- dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
- dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
- ctxt->ops->set_gdt(ctxt, &dt);
-
- dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
- dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
- ctxt->ops->set_idt(ctxt, &dt);
-
- for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_32(ctxt, smstate, i);
- if (r != X86EMUL_CONTINUE)
- return r;
- }
-
- cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
-
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
-
- return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
-}
-
-#ifdef CONFIG_X86_64
-static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
-{
- struct desc_struct desc;
- struct desc_ptr dt;
- u64 val, cr0, cr3, cr4;
- u32 base3;
- u16 selector;
- int i, r;
-
- for (i = 0; i < NR_EMULATOR_GPRS; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
-
- ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
- ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
-
- val = GET_SMSTATE(u64, smstate, 0x7f68);
-
- if (ctxt->ops->set_dr(ctxt, 6, val))
- return X86EMUL_UNHANDLEABLE;
-
- val = GET_SMSTATE(u64, smstate, 0x7f60);
-
- if (ctxt->ops->set_dr(ctxt, 7, val))
- return X86EMUL_UNHANDLEABLE;
-
- cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
- cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
- cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
- val = GET_SMSTATE(u64, smstate, 0x7ed0);
-
- if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
- return X86EMUL_UNHANDLEABLE;
-
- selector = GET_SMSTATE(u32, smstate, 0x7e90);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
- base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
-
- dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
- dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
- ctxt->ops->set_idt(ctxt, &dt);
-
- selector = GET_SMSTATE(u32, smstate, 0x7e70);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
- base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
-
- dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
- dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
- ctxt->ops->set_gdt(ctxt, &dt);
-
- r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
- if (r != X86EMUL_CONTINUE)
- return r;
-
- for (i = 0; i < 6; i++) {
- r = rsm_load_seg_64(ctxt, smstate, i);
- if (r != X86EMUL_CONTINUE)
- return r;
- }
-
- return X86EMUL_CONTINUE;
-}
-#endif
-
static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
- unsigned long cr0, cr4, efer;
- char buf[512];
- u64 smbase;
- int ret;
-
if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
return emulate_ud(ctxt);

- smbase = ctxt->ops->get_smbase(ctxt);
-
- ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
- if (ret != X86EMUL_CONTINUE)
- return X86EMUL_UNHANDLEABLE;
-
- if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
- ctxt->ops->set_nmi_mask(ctxt, false);
-
- ctxt->ops->exiting_smm(ctxt);
-
- /*
- * Get back to real mode, to prepare a safe state in which to load
- * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
- * supports long mode.
- */
- if (emulator_has_longmode(ctxt)) {
- struct desc_struct cs_desc;
-
- /* Zero CR4.PCIDE before CR0.PG. */
- cr4 = ctxt->ops->get_cr(ctxt, 4);
- if (cr4 & X86_CR4_PCIDE)
- ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
-
- /* A 32-bit code segment is required to clear EFER.LMA. */
- memset(&cs_desc, 0, sizeof(cs_desc));
- cs_desc.type = 0xb;
- cs_desc.s = cs_desc.g = cs_desc.p = 1;
- ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
- }
-
- /* For the 64-bit case, this will clear EFER.LMA. */
- cr0 = ctxt->ops->get_cr(ctxt, 0);
- if (cr0 & X86_CR0_PE)
- ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
-
- if (emulator_has_longmode(ctxt)) {
- /* Clear CR4.PAE before clearing EFER.LME. */
- cr4 = ctxt->ops->get_cr(ctxt, 4);
- if (cr4 & X86_CR4_PAE)
- ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
-
- /* And finally go back to 32-bit mode. */
- efer = 0;
- ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
- }
-
- /*
- * Give leave_smm() a chance to make ISA-specific changes to the vCPU
- * state (e.g. enter guest mode) before loading state from the SMM
- * state-save area.
- */
- if (ctxt->ops->leave_smm(ctxt, buf))
- goto emulate_shutdown;
-
-#ifdef CONFIG_X86_64
- if (emulator_has_longmode(ctxt))
- ret = rsm_load_state_64(ctxt, buf);
- else
-#endif
- ret = rsm_load_state_32(ctxt, buf);
-
- if (ret != X86EMUL_CONTINUE)
- goto emulate_shutdown;
-
- /*
- * Note, the ctxt->ops callbacks are responsible for handling side
- * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
- * runtime updates, etc... If that changes, e.g. this flow is moved
- * out of the emulator to make it look more like enter_smm(), then
- * those side effects need to be explicitly handled for both success
- * and shutdown.
- */
- return X86EMUL_CONTINUE;
+ if (ctxt->ops->leave_smm(ctxt))
+ ctxt->ops->triple_fault(ctxt);

-emulate_shutdown:
- ctxt->ops->triple_fault(ctxt);
return X86EMUL_CONTINUE;
}

diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 89246446d6aa..d7afbc448dd2 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -234,8 +234,7 @@ struct x86_emulate_ops {
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);

unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
- void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
- int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate);
+ int (*leave_smm)(struct x86_emulate_ctxt *ctxt);
void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
};
@@ -526,4 +525,35 @@ void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt);
void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt);
bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt);

+static inline ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+ if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
+ nr &= NR_EMULATOR_GPRS - 1;
+
+ if (!(ctxt->regs_valid & (1 << nr))) {
+ ctxt->regs_valid |= 1 << nr;
+ ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
+ }
+ return ctxt->_regs[nr];
+}
+
+static inline ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+ if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
+ nr &= NR_EMULATOR_GPRS - 1;
+
+ BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
+ BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
+
+ ctxt->regs_valid |= 1 << nr;
+ ctxt->regs_dirty |= 1 << nr;
+ return &ctxt->_regs[nr];
+}
+
+static inline ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+ reg_read(ctxt, nr);
+ return reg_write(ctxt, nr);
+}
+
#endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 26a6859e421f..773e07b6397d 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -270,3 +270,319 @@ void enter_smm(struct kvm_vcpu *vcpu)
kvm_update_cpuid_runtime(vcpu);
kvm_mmu_reset_context(vcpu);
}
+
+static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
+{
+#ifdef CONFIG_X86_64
+ return ctxt->ops->guest_has_long_mode(ctxt);
+#else
+ return false;
+#endif
+}
+
+static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
+{
+ desc->g = (flags >> 23) & 1;
+ desc->d = (flags >> 22) & 1;
+ desc->l = (flags >> 21) & 1;
+ desc->avl = (flags >> 20) & 1;
+ desc->p = (flags >> 15) & 1;
+ desc->dpl = (flags >> 13) & 3;
+ desc->s = (flags >> 12) & 1;
+ desc->type = (flags >> 8) & 15;
+}
+
+static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+ int n)
+{
+ struct desc_struct desc;
+ int offset;
+ u16 selector;
+
+ selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
+
+ if (n < 3)
+ offset = 0x7f84 + n * 12;
+ else
+ offset = 0x7f2c + (n - 3) * 12;
+
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
+ ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
+ return X86EMUL_CONTINUE;
+}
+
+#ifdef CONFIG_X86_64
+static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
+ int n)
+{
+ struct desc_struct desc;
+ int offset;
+ u16 selector;
+ u32 base3;
+
+ offset = 0x7e00 + n * 16;
+
+ selector = GET_SMSTATE(u16, smstate, offset);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
+ base3 = GET_SMSTATE(u32, smstate, offset + 12);
+
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
+ return X86EMUL_CONTINUE;
+}
+#endif
+
+static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+ u64 cr0, u64 cr3, u64 cr4)
+{
+ int bad;
+ u64 pcid;
+
+ /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
+ pcid = 0;
+ if (cr4 & X86_CR4_PCIDE) {
+ pcid = cr3 & 0xfff;
+ cr3 &= ~0xfff;
+ }
+
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+
+ /*
+ * First enable PAE, long mode needs it before CR0.PG = 1 is set.
+ * Then enable protected mode. However, PCID cannot be enabled
+ * if EFER.LMA=0, so set it separately.
+ */
+ bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+
+ bad = ctxt->ops->set_cr(ctxt, 0, cr0);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+
+ if (cr4 & X86_CR4_PCIDE) {
+ bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+ if (pcid) {
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ }
+
+ return X86EMUL_CONTINUE;
+}
+
+static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
+{
+ struct desc_struct desc;
+ struct desc_ptr dt;
+ u16 selector;
+ u32 val, cr0, cr3, cr4;
+ int i;
+
+ cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
+ cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
+ ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
+
+ for (i = 0; i < NR_EMULATOR_GPRS; i++)
+ *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
+
+ val = GET_SMSTATE(u32, smstate, 0x7fcc);
+
+ if (ctxt->ops->set_dr(ctxt, 6, val))
+ return X86EMUL_UNHANDLEABLE;
+
+ val = GET_SMSTATE(u32, smstate, 0x7fc8);
+
+ if (ctxt->ops->set_dr(ctxt, 7, val))
+ return X86EMUL_UNHANDLEABLE;
+
+ selector = GET_SMSTATE(u32, smstate, 0x7fc4);
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
+ ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
+
+ selector = GET_SMSTATE(u32, smstate, 0x7fc0);
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
+ ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
+
+ dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
+ ctxt->ops->set_gdt(ctxt, &dt);
+
+ dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
+ ctxt->ops->set_idt(ctxt, &dt);
+
+ for (i = 0; i < 6; i++) {
+ int r = rsm_load_seg_32(ctxt, smstate, i);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
+ cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
+
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
+
+ return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+}
+
+#ifdef CONFIG_X86_64
+static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
+{
+ struct desc_struct desc;
+ struct desc_ptr dt;
+ u64 val, cr0, cr3, cr4;
+ u32 base3;
+ u16 selector;
+ int i, r;
+
+ for (i = 0; i < NR_EMULATOR_GPRS; i++)
+ *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
+
+ ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
+ ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
+
+ val = GET_SMSTATE(u64, smstate, 0x7f68);
+
+ if (ctxt->ops->set_dr(ctxt, 6, val))
+ return X86EMUL_UNHANDLEABLE;
+
+ val = GET_SMSTATE(u64, smstate, 0x7f60);
+
+ if (ctxt->ops->set_dr(ctxt, 7, val))
+ return X86EMUL_UNHANDLEABLE;
+
+ cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
+ cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
+ cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
+ val = GET_SMSTATE(u64, smstate, 0x7ed0);
+
+ if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
+ return X86EMUL_UNHANDLEABLE;
+
+ selector = GET_SMSTATE(u32, smstate, 0x7e90);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
+ base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
+
+ dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
+ dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
+ ctxt->ops->set_idt(ctxt, &dt);
+
+ selector = GET_SMSTATE(u32, smstate, 0x7e70);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
+ base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
+
+ dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
+ dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
+ ctxt->ops->set_gdt(ctxt, &dt);
+
+ r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+
+ for (i = 0; i < 6; i++) {
+ r = rsm_load_seg_64(ctxt, smstate, i);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
+ return X86EMUL_CONTINUE;
+}
+#endif
+
+int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+ unsigned long cr0, cr4, efer;
+ char buf[512];
+ u64 smbase;
+ int ret;
+
+ smbase = ctxt->ops->get_smbase(ctxt);
+
+ ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
+ if (ret != X86EMUL_CONTINUE)
+ return X86EMUL_UNHANDLEABLE;
+
+ if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+ ctxt->ops->set_nmi_mask(ctxt, false);
+
+ kvm_smm_changed(vcpu, false);
+
+ /*
+ * Get back to real mode, to prepare a safe state in which to load
+ * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
+ * supports long mode.
+ *
+ * The ctxt->ops callbacks will handle all side effects when writing
+ * writing MSRs and CRs, e.g. MMU context resets, CPUID
+ * runtime updates, etc.
+ */
+ if (emulator_has_longmode(ctxt)) {
+ struct desc_struct cs_desc;
+
+ /* Zero CR4.PCIDE before CR0.PG. */
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PCIDE)
+ ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+
+ /* A 32-bit code segment is required to clear EFER.LMA. */
+ memset(&cs_desc, 0, sizeof(cs_desc));
+ cs_desc.type = 0xb;
+ cs_desc.s = cs_desc.g = cs_desc.p = 1;
+ ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
+ }
+
+ /* For the 64-bit case, this will clear EFER.LMA. */
+ cr0 = ctxt->ops->get_cr(ctxt, 0);
+ if (cr0 & X86_CR0_PE)
+ ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
+
+ if (emulator_has_longmode(ctxt)) {
+ /* Clear CR4.PAE before clearing EFER.LME. */
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PAE)
+ ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+
+ /* And finally go back to 32-bit mode. */
+ efer = 0;
+ ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+ }
+
+ /*
+ * Give leave_smm() a chance to make ISA-specific changes to the vCPU
+ * state (e.g. enter guest mode) before loading state from the SMM
+ * state-save area.
+ */
+ if (static_call(kvm_x86_leave_smm)(vcpu, buf))
+ return X86EMUL_UNHANDLEABLE;
+
+#ifdef CONFIG_X86_64
+ if (emulator_has_longmode(ctxt))
+ return rsm_load_state_64(ctxt, buf);
+ else
+#endif
+ return rsm_load_state_32(ctxt, buf);
+}
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index aacc6dac2c99..b0602a92e511 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -21,6 +21,7 @@ static inline bool is_smm(struct kvm_vcpu *vcpu)

void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
void enter_smm(struct kvm_vcpu *vcpu);
+int emulator_leave_smm(struct x86_emulate_ctxt *ctxt);
void process_smi(struct kvm_vcpu *vcpu);

#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 476b4a6e81ab..97d6ee179109 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8095,19 +8095,6 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
return emul_to_vcpu(ctxt)->arch.hflags;
}

-static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
-{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-
- kvm_smm_changed(vcpu, false);
-}
-
-static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
-{
- return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
-}
-
static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
{
kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
@@ -8171,7 +8158,6 @@ static const struct x86_emulate_ops emulate_ops = {
.guest_has_rdpid = emulator_guest_has_rdpid,
.set_nmi_mask = emulator_set_nmi_mask,
.get_hflags = emulator_get_hflags,
- .exiting_smm = emulator_exiting_smm,
.leave_smm = emulator_leave_smm,
.triple_fault = emulator_triple_fault,
.set_xcr = emulator_set_xcr,
--
2.31.1


2022-09-29 17:55:27

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH v2 1/8] KVM: x86: start moving SMM-related functions to new files

Create a new header and source with code related to system management
mode emulation. Entry and exit will move there too; for now,
opportunistically rename put_smstate to PUT_SMSTATE while moving
it to smm.h, and adjust the SMM state saving code.

Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 6 --
arch/x86/kvm/Makefile | 1 +
arch/x86/kvm/emulate.c | 1 +
arch/x86/kvm/kvm_cache_regs.h | 5 --
arch/x86/kvm/lapic.c | 14 ++-
arch/x86/kvm/lapic.h | 7 +-
arch/x86/kvm/mmu/mmu.c | 1 +
arch/x86/kvm/smm.c | 37 ++++++++
arch/x86/kvm/smm.h | 25 ++++++
arch/x86/kvm/svm/nested.c | 1 +
arch/x86/kvm/svm/svm.c | 5 +-
arch/x86/kvm/vmx/nested.c | 1 +
arch/x86/kvm/vmx/vmx.c | 1 +
arch/x86/kvm/x86.c | 148 ++++++++++++--------------------
14 files changed, 138 insertions(+), 115 deletions(-)
create mode 100644 arch/x86/kvm/smm.c
create mode 100644 arch/x86/kvm/smm.h

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2e325944872c..0ca8f28854ab 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2083,12 +2083,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#endif
}

-#define put_smstate(type, buf, offset, val) \
- *(type *)((buf) + (offset) - 0x7e00) = val
-
-#define GET_SMSTATE(type, buf, offset) \
- (*(type *)((buf) + (offset) - 0x7e00))
-
int kvm_cpu_dirty_log_size(void);

int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 30f244b64523..ec6f7656254b 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -20,6 +20,7 @@ endif

kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
kvm-$(CONFIG_KVM_XEN) += xen.o
+kvm-y += smm.o

kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b6180032dfd6..5208a13e40e0 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -30,6 +30,7 @@
#include "tss.h"
#include "mmu.h"
#include "pmu.h"
+#include "smm.h"

/*
* Operand types
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 3febc342360c..c09174f73a34 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -200,9 +200,4 @@ static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
return vcpu->arch.hflags & HF_GUEST_MASK;
}

-static inline bool is_smm(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.hflags & HF_SMM_MASK;
-}
-
#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d7639d126e6c..e636d8c681f4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -42,6 +42,7 @@
#include "x86.h"
#include "cpuid.h"
#include "hyperv.h"
+#include "smm.h"

#ifndef CONFIG_X86_64
#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -1170,9 +1171,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
break;

case APIC_DM_SMI:
- result = 1;
- kvm_make_request(KVM_REQ_SMI, vcpu);
- kvm_vcpu_kick(vcpu);
+ if (!kvm_inject_smi(vcpu)) {
+ kvm_vcpu_kick(vcpu);
+ result = 1;
+ }
break;

case APIC_DM_NMI:
@@ -3020,6 +3022,12 @@ int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
return 0;
}

+bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu)
+{
+ return !is_smm(vcpu) &&
+ !static_call(kvm_x86_apic_init_signal_blocked)(vcpu);
+}
+
int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index a5ac4a5a5179..cb7e68c93e1a 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -7,7 +7,6 @@
#include <linux/kvm_host.h>

#include "hyperv.h"
-#include "kvm_cache_regs.h"

#define KVM_APIC_INIT 0
#define KVM_APIC_SIPI 1
@@ -229,11 +228,7 @@ static inline bool kvm_apic_has_pending_init_or_sipi(struct kvm_vcpu *vcpu)
return lapic_in_kernel(vcpu) && vcpu->arch.apic->pending_events;
}

-static inline bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu)
-{
- return !is_smm(vcpu) &&
- !static_call(kvm_x86_apic_init_signal_blocked)(vcpu);
-}
+bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu);

static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
{
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 40feb5ec761e..04927a49fb69 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -22,6 +22,7 @@
#include "tdp_mmu.h"
#include "x86.h"
#include "kvm_cache_regs.h"
+#include "smm.h"
#include "kvm_emulate.h"
#include "cpuid.h"
#include "spte.h"
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
new file mode 100644
index 000000000000..b91c48d91f6e
--- /dev/null
+++ b/arch/x86/kvm/smm.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/kvm_host.h>
+#include "x86.h"
+#include "kvm_cache_regs.h"
+#include "kvm_emulate.h"
+#include "smm.h"
+#include "trace.h"
+
+void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
+{
+ trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
+
+ if (entering_smm) {
+ vcpu->arch.hflags |= HF_SMM_MASK;
+ } else {
+ vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
+
+ /* Process a latched INIT or SMI, if any. */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+ /*
+ * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
+ * on SMM exit we still need to reload them from
+ * guest memory
+ */
+ vcpu->arch.pdptrs_from_userspace = false;
+ }
+
+ kvm_mmu_reset_context(vcpu);
+}
+
+void process_smi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.smi_pending = true;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+}
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
new file mode 100644
index 000000000000..d85d4ccd32dd
--- /dev/null
+++ b/arch/x86/kvm/smm.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_KVM_SMM_H
+#define ASM_KVM_SMM_H
+
+#define GET_SMSTATE(type, buf, offset) \
+ (*(type *)((buf) + (offset) - 0x7e00))
+
+#define PUT_SMSTATE(type, buf, offset, val) \
+ *(type *)((buf) + (offset) - 0x7e00) = val
+
+static inline int kvm_inject_smi(struct kvm_vcpu *vcpu)
+{
+ kvm_make_request(KVM_REQ_SMI, vcpu);
+ return 0;
+}
+
+static inline bool is_smm(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.hflags & HF_SMM_MASK;
+}
+
+void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
+void process_smi(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 4c620999d230..cc0fd75f7cba 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -25,6 +25,7 @@
#include "trace.h"
#include "mmu.h"
#include "x86.h"
+#include "smm.h"
#include "cpuid.h"
#include "lapic.h"
#include "svm.h"
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 58f0077d9357..496ee7d1ae2f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -6,6 +6,7 @@
#include "mmu.h"
#include "kvm_cache_regs.h"
#include "x86.h"
+#include "smm.h"
#include "cpuid.h"
#include "pmu.h"

@@ -4442,9 +4443,9 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;

/* FED8h - SVM Guest */
- put_smstate(u64, smstate, 0x7ed8, 1);
+ PUT_SMSTATE(u64, smstate, 0x7ed8, 1);
/* FEE0h - SVM Guest VMCB Physical Address */
- put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
+ PUT_SMSTATE(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);

svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 8f67a9c4a287..29215925e75b 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -16,6 +16,7 @@
#include "trace.h"
#include "vmx.h"
#include "x86.h"
+#include "smm.h"

static bool __read_mostly enable_shadow_vmcs = 1;
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9dba04b6b019..038809c68006 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -66,6 +66,7 @@
#include "vmcs12.h"
#include "vmx.h"
#include "x86.h"
+#include "smm.h"

MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eb9d2c23fb04..e0e461958c81 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -30,6 +30,7 @@
#include "hyperv.h"
#include "lapic.h"
#include "xen.h"
+#include "smm.h"

#include <linux/clocksource.h>
#include <linux/interrupt.h>
@@ -119,7 +120,6 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;

static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
-static void process_smi(struct kvm_vcpu *vcpu);
static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
static void store_regs(struct kvm_vcpu *vcpu);
@@ -4883,13 +4883,6 @@ static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
return 0;
}

-static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
-{
- kvm_make_request(KVM_REQ_SMI, vcpu);
-
- return 0;
-}
-
static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
struct kvm_tpr_access_ctl *tac)
{
@@ -5112,8 +5105,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
memset(&events->reserved, 0, sizeof(events->reserved));
}

-static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm);
-
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
@@ -5566,7 +5557,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_SMI: {
- r = kvm_vcpu_ioctl_smi(vcpu);
+ r = kvm_inject_smi(vcpu);
break;
}
case KVM_SET_CPUID: {
@@ -8514,29 +8505,6 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
static int complete_emulated_pio(struct kvm_vcpu *vcpu);

-static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
-{
- trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
-
- if (entering_smm) {
- vcpu->arch.hflags |= HF_SMM_MASK;
- } else {
- vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
-
- /* Process a latched INIT or SMI, if any. */
- kvm_make_request(KVM_REQ_EVENT, vcpu);
-
- /*
- * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
- * on SMM exit we still need to reload them from
- * guest memory
- */
- vcpu->arch.pdptrs_from_userspace = false;
- }
-
- kvm_mmu_reset_context(vcpu);
-}
-
static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
unsigned long *db)
{
@@ -10020,16 +9988,16 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
int offset;

kvm_get_segment(vcpu, &seg, n);
- put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
+ PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);

if (n < 3)
offset = 0x7f84 + n * 12;
else
offset = 0x7f2c + (n - 3) * 12;

- put_smstate(u32, buf, offset + 8, seg.base);
- put_smstate(u32, buf, offset + 4, seg.limit);
- put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
+ PUT_SMSTATE(u32, buf, offset + 8, seg.base);
+ PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
+ PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
}

#ifdef CONFIG_X86_64
@@ -10043,10 +10011,10 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
offset = 0x7e00 + n * 16;

flags = enter_smm_get_segment_flags(&seg) >> 8;
- put_smstate(u16, buf, offset, seg.selector);
- put_smstate(u16, buf, offset + 2, flags);
- put_smstate(u32, buf, offset + 4, seg.limit);
- put_smstate(u64, buf, offset + 8, seg.base);
+ PUT_SMSTATE(u16, buf, offset, seg.selector);
+ PUT_SMSTATE(u16, buf, offset + 2, flags);
+ PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
+ PUT_SMSTATE(u64, buf, offset + 8, seg.base);
}
#endif

@@ -10057,47 +10025,47 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
unsigned long val;
int i;

- put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
- put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
- put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
- put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));

for (i = 0; i < 8; i++)
- put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
+ PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));

kvm_get_dr(vcpu, 6, &val);
- put_smstate(u32, buf, 0x7fcc, (u32)val);
+ PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
kvm_get_dr(vcpu, 7, &val);
- put_smstate(u32, buf, 0x7fc8, (u32)val);
+ PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);

kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- put_smstate(u32, buf, 0x7fc4, seg.selector);
- put_smstate(u32, buf, 0x7f64, seg.base);
- put_smstate(u32, buf, 0x7f60, seg.limit);
- put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
+ PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
+ PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
+ PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
+ PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));

kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- put_smstate(u32, buf, 0x7fc0, seg.selector);
- put_smstate(u32, buf, 0x7f80, seg.base);
- put_smstate(u32, buf, 0x7f7c, seg.limit);
- put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
+ PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
+ PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
+ PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
+ PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));

static_call(kvm_x86_get_gdt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7f74, dt.address);
- put_smstate(u32, buf, 0x7f70, dt.size);
+ PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
+ PUT_SMSTATE(u32, buf, 0x7f70, dt.size);

static_call(kvm_x86_get_idt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7f58, dt.address);
- put_smstate(u32, buf, 0x7f54, dt.size);
+ PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
+ PUT_SMSTATE(u32, buf, 0x7f54, dt.size);

for (i = 0; i < 6; i++)
enter_smm_save_seg_32(vcpu, buf, i);

- put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));

/* revision id */
- put_smstate(u32, buf, 0x7efc, 0x00020000);
- put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
+ PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
+ PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
}

#ifdef CONFIG_X86_64
@@ -10109,46 +10077,46 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
int i;

for (i = 0; i < 16; i++)
- put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
+ PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));

- put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
- put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
+ PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));

kvm_get_dr(vcpu, 6, &val);
- put_smstate(u64, buf, 0x7f68, val);
+ PUT_SMSTATE(u64, buf, 0x7f68, val);
kvm_get_dr(vcpu, 7, &val);
- put_smstate(u64, buf, 0x7f60, val);
+ PUT_SMSTATE(u64, buf, 0x7f60, val);

- put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
- put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
- put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+ PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
+ PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
+ PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));

- put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
+ PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);

/* revision id */
- put_smstate(u32, buf, 0x7efc, 0x00020064);
+ PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);

- put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
+ PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);

kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- put_smstate(u16, buf, 0x7e90, seg.selector);
- put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
- put_smstate(u32, buf, 0x7e94, seg.limit);
- put_smstate(u64, buf, 0x7e98, seg.base);
+ PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
+ PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
+ PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
+ PUT_SMSTATE(u64, buf, 0x7e98, seg.base);

static_call(kvm_x86_get_idt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7e84, dt.size);
- put_smstate(u64, buf, 0x7e88, dt.address);
+ PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
+ PUT_SMSTATE(u64, buf, 0x7e88, dt.address);

kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- put_smstate(u16, buf, 0x7e70, seg.selector);
- put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
- put_smstate(u32, buf, 0x7e74, seg.limit);
- put_smstate(u64, buf, 0x7e78, seg.base);
+ PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
+ PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
+ PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
+ PUT_SMSTATE(u64, buf, 0x7e78, seg.base);

static_call(kvm_x86_get_gdt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7e64, dt.size);
- put_smstate(u64, buf, 0x7e68, dt.address);
+ PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
+ PUT_SMSTATE(u64, buf, 0x7e68, dt.address);

for (i = 0; i < 6; i++)
enter_smm_save_seg_64(vcpu, buf, i);
@@ -10234,12 +10202,6 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
}

-static void process_smi(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.smi_pending = true;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
-}
-
void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
unsigned long *vcpu_bitmap)
{
--
2.31.1


2022-09-29 17:56:20

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH v2 7/8] KVM: x86: remove SMRAM address space if SMM is not supported

If CONFIG_KVM_SMM is not defined HF_SMM_MASK will always be zero, and
we can spare userspace the hassle of setting up the SMRAM address space
simply by reporting that only one address space is supported.

Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cb88da02d965..d11697504471 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1994,11 +1994,14 @@ enum {
#define HF_SMM_MASK (1 << 6)
#define HF_SMM_INSIDE_NMI_MASK (1 << 7)

-#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
-#define KVM_ADDRESS_SPACE_NUM 2
-
-#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
-#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+#ifdef CONFIG_KVM_SMM
+# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+# define KVM_ADDRESS_SPACE_NUM 2
+# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
+# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+#else
+# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
+#endif

#define KVM_ARCH_WANT_MMU_NOTIFIER

--
2.31.1


2022-10-01 13:30:10

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v2 4/8] KVM: x86: do not go through ctxt->ops when emulating rsm

Hi Paolo,

I love your patch! Perhaps something to improve:

[auto build test WARNING on kvm/queue]
[also build test WARNING on next-20220930]
[cannot apply to mst-vhost/linux-next linus/master v6.0-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Paolo-Bonzini/KVM-x86-allow-compiling-out-SMM-support/20220930-012220
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
config: i386-randconfig-a001-20220926
compiler: gcc-11 (Debian 11.3.0-5) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/cc73a769dae68cad03e5f075d8b09aac79cd0125
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Paolo-Bonzini/KVM-x86-allow-compiling-out-SMM-support/20220930-012220
git checkout cc73a769dae68cad03e5f075d8b09aac79cd0125
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash arch/x86/kvm/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

arch/x86/kvm/smm.c: In function 'emulator_leave_smm':
>> arch/x86/kvm/smm.c:503:33: warning: unused variable 'efer' [-Wunused-variable]
503 | unsigned long cr0, cr4, efer;
| ^~~~
>> arch/x86/kvm/smm.c:503:28: warning: unused variable 'cr4' [-Wunused-variable]
503 | unsigned long cr0, cr4, efer;
| ^~~


vim +/efer +503 arch/x86/kvm/smm.c

cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 499
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 500 int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 501 {
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 502 struct kvm_vcpu *vcpu = ctxt->vcpu;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 @503 unsigned long cr0, cr4, efer;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 504 char buf[512];
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 505 u64 smbase;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 506 int ret;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 507
cc73a769dae68c Paolo Bonzini 2022-09-29 508 smbase = vcpu->arch.smbase;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 509
cc73a769dae68c Paolo Bonzini 2022-09-29 510 ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf));
cc73a769dae68c Paolo Bonzini 2022-09-29 511 if (ret < 0)
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 512 return X86EMUL_UNHANDLEABLE;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 513
cc73a769dae68c Paolo Bonzini 2022-09-29 514 if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
cc73a769dae68c Paolo Bonzini 2022-09-29 515 static_call(kvm_x86_set_nmi_mask)(vcpu, false);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 516
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 517 kvm_smm_changed(vcpu, false);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 518
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 519 /*
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 520 * Get back to real mode, to prepare a safe state in which to load
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 521 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 522 * supports long mode.
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 523 */
cc73a769dae68c Paolo Bonzini 2022-09-29 524 #ifdef CONFIG_X86_64
cc73a769dae68c Paolo Bonzini 2022-09-29 525 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
cc73a769dae68c Paolo Bonzini 2022-09-29 526 struct kvm_segment cs_desc;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 527
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 528 /* Zero CR4.PCIDE before CR0.PG. */
cc73a769dae68c Paolo Bonzini 2022-09-29 529 cr4 = kvm_read_cr4(vcpu);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 530 if (cr4 & X86_CR4_PCIDE)
cc73a769dae68c Paolo Bonzini 2022-09-29 531 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 532
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 533 /* A 32-bit code segment is required to clear EFER.LMA. */
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 534 memset(&cs_desc, 0, sizeof(cs_desc));
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 535 cs_desc.type = 0xb;
cc73a769dae68c Paolo Bonzini 2022-09-29 536 cs_desc.s = cs_desc.g = cs_desc.present = 1;
cc73a769dae68c Paolo Bonzini 2022-09-29 537 kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 538 }
cc73a769dae68c Paolo Bonzini 2022-09-29 539 #endif
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 540
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 541 /* For the 64-bit case, this will clear EFER.LMA. */
cc73a769dae68c Paolo Bonzini 2022-09-29 542 cr0 = kvm_read_cr0(vcpu);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 543 if (cr0 & X86_CR0_PE)
cc73a769dae68c Paolo Bonzini 2022-09-29 544 kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 545

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (5.24 kB)
config (172.06 kB)
Download all attachments

2022-10-01 14:27:19

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v2 4/8] KVM: x86: do not go through ctxt->ops when emulating rsm

Hi Paolo,

I love your patch! Perhaps something to improve:

[auto build test WARNING on kvm/queue]
[also build test WARNING on next-20220930]
[cannot apply to mst-vhost/linux-next linus/master v6.0-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Paolo-Bonzini/KVM-x86-allow-compiling-out-SMM-support/20220930-012220
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
config: i386-randconfig-a013-20220926
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/cc73a769dae68cad03e5f075d8b09aac79cd0125
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Paolo-Bonzini/KVM-x86-allow-compiling-out-SMM-support/20220930-012220
git checkout cc73a769dae68cad03e5f075d8b09aac79cd0125
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash arch/x86/kvm/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

>> arch/x86/kvm/smm.c:503:21: warning: unused variable 'cr4' [-Wunused-variable]
unsigned long cr0, cr4, efer;
^
>> arch/x86/kvm/smm.c:503:26: warning: unused variable 'efer' [-Wunused-variable]
unsigned long cr0, cr4, efer;
^
2 warnings generated.


vim +/cr4 +503 arch/x86/kvm/smm.c

cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 499
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 500 int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 501 {
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 502 struct kvm_vcpu *vcpu = ctxt->vcpu;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 @503 unsigned long cr0, cr4, efer;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 504 char buf[512];
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 505 u64 smbase;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 506 int ret;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 507
cc73a769dae68c Paolo Bonzini 2022-09-29 508 smbase = vcpu->arch.smbase;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 509
cc73a769dae68c Paolo Bonzini 2022-09-29 510 ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf));
cc73a769dae68c Paolo Bonzini 2022-09-29 511 if (ret < 0)
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 512 return X86EMUL_UNHANDLEABLE;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 513
cc73a769dae68c Paolo Bonzini 2022-09-29 514 if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
cc73a769dae68c Paolo Bonzini 2022-09-29 515 static_call(kvm_x86_set_nmi_mask)(vcpu, false);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 516
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 517 kvm_smm_changed(vcpu, false);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 518
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 519 /*
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 520 * Get back to real mode, to prepare a safe state in which to load
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 521 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 522 * supports long mode.
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 523 */
cc73a769dae68c Paolo Bonzini 2022-09-29 524 #ifdef CONFIG_X86_64
cc73a769dae68c Paolo Bonzini 2022-09-29 525 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
cc73a769dae68c Paolo Bonzini 2022-09-29 526 struct kvm_segment cs_desc;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 527
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 528 /* Zero CR4.PCIDE before CR0.PG. */
cc73a769dae68c Paolo Bonzini 2022-09-29 529 cr4 = kvm_read_cr4(vcpu);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 530 if (cr4 & X86_CR4_PCIDE)
cc73a769dae68c Paolo Bonzini 2022-09-29 531 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 532
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 533 /* A 32-bit code segment is required to clear EFER.LMA. */
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 534 memset(&cs_desc, 0, sizeof(cs_desc));
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 535 cs_desc.type = 0xb;
cc73a769dae68c Paolo Bonzini 2022-09-29 536 cs_desc.s = cs_desc.g = cs_desc.present = 1;
cc73a769dae68c Paolo Bonzini 2022-09-29 537 kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 538 }
cc73a769dae68c Paolo Bonzini 2022-09-29 539 #endif
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 540
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 541 /* For the 64-bit case, this will clear EFER.LMA. */
cc73a769dae68c Paolo Bonzini 2022-09-29 542 cr0 = kvm_read_cr0(vcpu);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 543 if (cr0 & X86_CR0_PE)
cc73a769dae68c Paolo Bonzini 2022-09-29 544 kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 545

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (5.43 kB)
config (152.49 kB)
Download all attachments

2022-10-02 03:12:52

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v2 4/8] KVM: x86: do not go through ctxt->ops when emulating rsm

Hi Paolo,

I love your patch! Yet something to improve:

[auto build test ERROR on kvm/queue]
[also build test ERROR on next-20220930]
[cannot apply to mst-vhost/linux-next linus/master v6.0-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Paolo-Bonzini/KVM-x86-allow-compiling-out-SMM-support/20220930-012220
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
config: i386-randconfig-a002
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/cc73a769dae68cad03e5f075d8b09aac79cd0125
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Paolo-Bonzini/KVM-x86-allow-compiling-out-SMM-support/20220930-012220
git checkout cc73a769dae68cad03e5f075d8b09aac79cd0125
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

>> arch/x86/kvm/smm.c:503:21: error: unused variable 'cr4' [-Werror,-Wunused-variable]
unsigned long cr0, cr4, efer;
^
>> arch/x86/kvm/smm.c:503:26: error: unused variable 'efer' [-Werror,-Wunused-variable]
unsigned long cr0, cr4, efer;
^
2 errors generated.


vim +/cr4 +503 arch/x86/kvm/smm.c

cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 499
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 500 int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 501 {
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 502 struct kvm_vcpu *vcpu = ctxt->vcpu;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 @503 unsigned long cr0, cr4, efer;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 504 char buf[512];
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 505 u64 smbase;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 506 int ret;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 507
cc73a769dae68c Paolo Bonzini 2022-09-29 508 smbase = vcpu->arch.smbase;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 509
cc73a769dae68c Paolo Bonzini 2022-09-29 510 ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf));
cc73a769dae68c Paolo Bonzini 2022-09-29 511 if (ret < 0)
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 512 return X86EMUL_UNHANDLEABLE;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 513
cc73a769dae68c Paolo Bonzini 2022-09-29 514 if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
cc73a769dae68c Paolo Bonzini 2022-09-29 515 static_call(kvm_x86_set_nmi_mask)(vcpu, false);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 516
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 517 kvm_smm_changed(vcpu, false);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 518
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 519 /*
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 520 * Get back to real mode, to prepare a safe state in which to load
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 521 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 522 * supports long mode.
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 523 */
cc73a769dae68c Paolo Bonzini 2022-09-29 524 #ifdef CONFIG_X86_64
cc73a769dae68c Paolo Bonzini 2022-09-29 525 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
cc73a769dae68c Paolo Bonzini 2022-09-29 526 struct kvm_segment cs_desc;
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 527
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 528 /* Zero CR4.PCIDE before CR0.PG. */
cc73a769dae68c Paolo Bonzini 2022-09-29 529 cr4 = kvm_read_cr4(vcpu);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 530 if (cr4 & X86_CR4_PCIDE)
cc73a769dae68c Paolo Bonzini 2022-09-29 531 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 532
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 533 /* A 32-bit code segment is required to clear EFER.LMA. */
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 534 memset(&cs_desc, 0, sizeof(cs_desc));
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 535 cs_desc.type = 0xb;
cc73a769dae68c Paolo Bonzini 2022-09-29 536 cs_desc.s = cs_desc.g = cs_desc.present = 1;
cc73a769dae68c Paolo Bonzini 2022-09-29 537 kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 538 }
cc73a769dae68c Paolo Bonzini 2022-09-29 539 #endif
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 540
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 541 /* For the 64-bit case, this will clear EFER.LMA. */
cc73a769dae68c Paolo Bonzini 2022-09-29 542 cr0 = kvm_read_cr0(vcpu);
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 543 if (cr0 & X86_CR0_PE)
cc73a769dae68c Paolo Bonzini 2022-09-29 544 kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
cc6eb1e4699ff9 Paolo Bonzini 2022-09-29 545

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (5.41 kB)
config (166.86 kB)
Download all attachments

2022-10-14 19:52:10

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH v2 1/8] KVM: x86: start moving SMM-related functions to new files

On Thu, Sep 29, 2022, Paolo Bonzini wrote:
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index a5ac4a5a5179..cb7e68c93e1a 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -7,7 +7,6 @@
> #include <linux/kvm_host.h>
>
> #include "hyperv.h"
> -#include "kvm_cache_regs.h"
>
> #define KVM_APIC_INIT 0
> #define KVM_APIC_SIPI 1
> @@ -229,11 +228,7 @@ static inline bool kvm_apic_has_pending_init_or_sipi(struct kvm_vcpu *vcpu)
> return lapic_in_kernel(vcpu) && vcpu->arch.apic->pending_events;
> }
>
> -static inline bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu)
> -{
> - return !is_smm(vcpu) &&
> - !static_call(kvm_x86_apic_init_signal_blocked)(vcpu);
> -}
> +bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu);

I don't see any reason to force this to go in lapic.c, just include smm.h in
lapic.h, it's not an unreasonable dependency. That way this ends up being a
direct call when SMM is compiled out.

> diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
> new file mode 100644
> index 000000000000..d85d4ccd32dd
> --- /dev/null
> +++ b/arch/x86/kvm/smm.h
> @@ -0,0 +1,25 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef ASM_KVM_SMM_H
> +#define ASM_KVM_SMM_H

#include <linux/kvm_host.h>

For HF_SMM_MASK, KVM_REQ_SMI, struct kvm_vcpu, etc... Relying on the caller to
include that might bite someone in the futrure.

2022-10-14 21:42:09

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH v2 4/8] KVM: x86: do not go through ctxt->ops when emulating rsm

On Thu, Sep 29, 2022, Paolo Bonzini wrote:
> @@ -520,14 +505,14 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
> u64 smbase;
> int ret;
>
> - smbase = ctxt->ops->get_smbase(ctxt);
> + smbase = vcpu->arch.smbase;
>
> - ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
> - if (ret != X86EMUL_CONTINUE)
> + ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf));
> + if (ret < 0)
> return X86EMUL_UNHANDLEABLE;
>
> - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
> - ctxt->ops->set_nmi_mask(ctxt, false);
> + if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
> + static_call(kvm_x86_set_nmi_mask)(vcpu, false);
>
> kvm_smm_changed(vcpu, false);
>
> @@ -535,41 +520,41 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
> * Get back to real mode, to prepare a safe state in which to load
> * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
> * supports long mode.
> - *
> - * The ctxt->ops callbacks will handle all side effects when writing
> - * writing MSRs and CRs, e.g. MMU context resets, CPUID
> - * runtime updates, etc.
> */
> - if (emulator_has_longmode(ctxt)) {
> - struct desc_struct cs_desc;
> +#ifdef CONFIG_X86_64
> + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {

To fix the "unused 'efer'" issue and avoid multiple guest_cpuid_has() calls, this
as fixup? It's not like we care about the code footprint for 32-bit KVM if the
compiler isn't clever enough to optimize away the dead code.

---
arch/x86/kvm/smm.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 41ca128478fc..740fca1cf3a3 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -500,6 +500,8 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
+ bool is_64bit_vcpu = IS_ENABLED(CONFIG_X86_64) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_LM);
unsigned long cr0, cr4, efer;
char buf[512];
u64 smbase;
@@ -521,8 +523,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
* CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
* supports long mode.
*/
-#ifdef CONFIG_X86_64
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
+ if (is_64bit_vcpu) {
struct kvm_segment cs_desc;

/* Zero CR4.PCIDE before CR0.PG. */
@@ -536,15 +537,13 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
cs_desc.s = cs_desc.g = cs_desc.present = 1;
kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
}
-#endif

/* For the 64-bit case, this will clear EFER.LMA. */
cr0 = kvm_read_cr0(vcpu);
if (cr0 & X86_CR0_PE)
kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));

-#ifdef CONFIG_X86_64
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
+ if (is_64bit_vcpu) {
/* Clear CR4.PAE before clearing EFER.LME. */
cr4 = kvm_read_cr4(vcpu);
if (cr4 & X86_CR4_PAE)
@@ -554,7 +553,6 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
efer = 0;
kvm_set_msr(vcpu, MSR_EFER, efer);
}
-#endif

/*
* Give leave_smm() a chance to make ISA-specific changes to the vCPU
@@ -565,7 +563,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
return X86EMUL_UNHANDLEABLE;

#ifdef CONFIG_X86_64
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ if (is_64bit_vcpu)
return rsm_load_state_64(ctxt, buf);
else
#endif

base-commit: 8b86d27cc60a150252b04989de818ad4ec85f899
--

2022-10-14 22:06:48

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH v2 8/8] KVM: x86: do not define KVM_REQ_SMI if SMM disabled

On Thu, Sep 29, 2022, Paolo Bonzini wrote:
> This ensures that all the relevant code is compiled out, in fact
> the process_smi stub can be removed too.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 2 ++
> arch/x86/kvm/smm.h | 1 -
> arch/x86/kvm/x86.c | 6 ++++++
> 3 files changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d11697504471..d58d4a62b227 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -81,7 +81,9 @@
> #define KVM_REQ_NMI KVM_ARCH_REQ(9)
> #define KVM_REQ_PMU KVM_ARCH_REQ(10)
> #define KVM_REQ_PMI KVM_ARCH_REQ(11)
> +#ifdef CONFIG_KVM_SMM
> #define KVM_REQ_SMI KVM_ARCH_REQ(12)
> +#endif
> #define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13)
> #define KVM_REQ_MCLOCK_INPROGRESS \
> KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
> index 7ccce6b655ca..a6795b93ba30 100644
> --- a/arch/x86/kvm/smm.h
> +++ b/arch/x86/kvm/smm.h
> @@ -28,7 +28,6 @@ void process_smi(struct kvm_vcpu *vcpu);
> static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
> static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
> static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); }
> -static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }

I think it's worth adding one more patch to kill off kvm_smm_changed() too. Most
of the affected code already has references to CONFIG_KVM_SMM nearby.

---
arch/x86/include/asm/kvm_host.h | 3 ++-
arch/x86/kvm/smm.c | 4 ++++
arch/x86/kvm/smm.h | 2 --
arch/x86/kvm/x86.c | 18 +++++++++---------
4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0b0a82c0bb5c..6c572cf1cf8d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1994,10 +1994,11 @@ enum {
#define HF_NMI_MASK (1 << 3)
#define HF_IRET_MASK (1 << 4)
#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
+
+#ifdef CONFIG_KVM_SMM
#define HF_SMM_MASK (1 << 6)
#define HF_SMM_INSIDE_NMI_MASK (1 << 7)

-#ifdef CONFIG_KVM_SMM
# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
# define KVM_ADDRESS_SPACE_NUM 2
# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 740fca1cf3a3..12480446c43b 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -10,6 +10,10 @@

void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
{
+ BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
+ BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
+ BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
+
trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);

if (entering_smm) {
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index 131fbe1817d5..9935045fcf20 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -29,8 +29,6 @@ void process_smi(struct kvm_vcpu *vcpu);
#else
static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
-static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); }
-
/*
* emulator_leave_smm is used as a function pointer, so the
* stub is defined in x86.c.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 56004890a717..ec74d579ca1b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5103,10 +5103,12 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,

events->sipi_vector = 0; /* never valid when reporting to user space */

+#ifdef CONFIG_KVM_SMM
events->smi.smm = is_smm(vcpu);
events->smi.pending = vcpu->arch.smi_pending;
events->smi.smm_inside_nmi =
!!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
+#endif
events->smi.latched_init = kvm_lapic_latched_init(vcpu);

events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
@@ -5194,12 +5196,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.apic->sipi_vector = events->sipi_vector;

if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
- if (!IS_ENABLED(CONFIG_KVM_SMM) &&
- (events->smi.smm ||
- events->smi.pending ||
- events->smi.smm_inside_nmi))
- return -EINVAL;
-
+#ifdef CONFIG_KVM_SMM
if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
kvm_x86_ops.nested_ops->leave_nested(vcpu);
kvm_smm_changed(vcpu, events->smi.smm);
@@ -5214,6 +5211,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
}

+#else
+ if (events->smi.smm || events->smi.pending ||
+ events->smi.smm_inside_nmi)
+ return -EINVAL;
+#endif
+
if (lapic_in_kernel(vcpu)) {
if (events->smi.latched_init)
set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
@@ -8228,9 +8231,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
(cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
cs_db ? X86EMUL_MODE_PROT32 :
X86EMUL_MODE_PROT16;
- BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
- BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
- BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);

ctxt->interruptibility = 0;
ctxt->have_exception = false;

base-commit: f7641bcac507589d34b20d30cceb7067f8bcfd08
--

2022-10-24 14:14:48

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v2 1/8] KVM: x86: start moving SMM-related functions to new files

On Thu, 2022-09-29 at 13:20 -0400, Paolo Bonzini wrote:
> Create a new header and source with code related to system management
> mode emulation. Entry and exit will move there too; for now,
> opportunistically rename put_smstate to PUT_SMSTATE while moving
> it to smm.h, and adjust the SMM state saving code.

I am going to remove the put_smstate/get_smstate/etc, so probalby not
worth doing it.

>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 6 --
> arch/x86/kvm/Makefile | 1 +
> arch/x86/kvm/emulate.c | 1 +
> arch/x86/kvm/kvm_cache_regs.h | 5 --
> arch/x86/kvm/lapic.c | 14 ++-
> arch/x86/kvm/lapic.h | 7 +-
> arch/x86/kvm/mmu/mmu.c | 1 +
> arch/x86/kvm/smm.c | 37 ++++++++
> arch/x86/kvm/smm.h | 25 ++++++
> arch/x86/kvm/svm/nested.c | 1 +
> arch/x86/kvm/svm/svm.c | 5 +-
> arch/x86/kvm/vmx/nested.c | 1 +
> arch/x86/kvm/vmx/vmx.c | 1 +
> arch/x86/kvm/x86.c | 148 ++++++++++++--------------------
> 14 files changed, 138 insertions(+), 115 deletions(-)
> create mode 100644 arch/x86/kvm/smm.c
> create mode 100644 arch/x86/kvm/smm.h
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 2e325944872c..0ca8f28854ab 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -2083,12 +2083,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
> #endif
> }
>
> -#define put_smstate(type, buf, offset, val) \
> - *(type *)((buf) + (offset) - 0x7e00) = val
> -
> -#define GET_SMSTATE(type, buf, offset) \
> - (*(type *)((buf) + (offset) - 0x7e00))
> -
> int kvm_cpu_dirty_log_size(void);
>
> int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index 30f244b64523..ec6f7656254b 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -20,6 +20,7 @@ endif
>
> kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
> kvm-$(CONFIG_KVM_XEN) += xen.o
> +kvm-y += smm.o
>
> kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
> vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index b6180032dfd6..5208a13e40e0 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -30,6 +30,7 @@
> #include "tss.h"
> #include "mmu.h"
> #include "pmu.h"
> +#include "smm.h"
>
> /*
> * Operand types
> diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
> index 3febc342360c..c09174f73a34 100644
> --- a/arch/x86/kvm/kvm_cache_regs.h
> +++ b/arch/x86/kvm/kvm_cache_regs.h
> @@ -200,9 +200,4 @@ static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
> return vcpu->arch.hflags & HF_GUEST_MASK;
> }
>
> -static inline bool is_smm(struct kvm_vcpu *vcpu)
> -{
> - return vcpu->arch.hflags & HF_SMM_MASK;
> -}
> -
> #endif
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index d7639d126e6c..e636d8c681f4 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -42,6 +42,7 @@
> #include "x86.h"
> #include "cpuid.h"
> #include "hyperv.h"
> +#include "smm.h"
>
> #ifndef CONFIG_X86_64
> #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
> @@ -1170,9 +1171,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
> break;
>
> case APIC_DM_SMI:
> - result = 1;
> - kvm_make_request(KVM_REQ_SMI, vcpu);
> - kvm_vcpu_kick(vcpu);
> + if (!kvm_inject_smi(vcpu)) {
> + kvm_vcpu_kick(vcpu);
> + result = 1;
> + }
> break;
>
> case APIC_DM_NMI:
> @@ -3020,6 +3022,12 @@ int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
> return 0;
> }
>
> +bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu)
> +{
> + return !is_smm(vcpu) &&
> + !static_call(kvm_x86_apic_init_signal_blocked)(vcpu);
> +}
> +
> int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
> {
> struct kvm_lapic *apic = vcpu->arch.apic;
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index a5ac4a5a5179..cb7e68c93e1a 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -7,7 +7,6 @@
> #include <linux/kvm_host.h>
>
> #include "hyperv.h"
> -#include "kvm_cache_regs.h"
>
> #define KVM_APIC_INIT 0
> #define KVM_APIC_SIPI 1
> @@ -229,11 +228,7 @@ static inline bool kvm_apic_has_pending_init_or_sipi(struct kvm_vcpu *vcpu)
> return lapic_in_kernel(vcpu) && vcpu->arch.apic->pending_events;
> }
>
> -static inline bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu)
> -{
> - return !is_smm(vcpu) &&
> - !static_call(kvm_x86_apic_init_signal_blocked)(vcpu);
> -}
> +bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu);
>
> static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
> {
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 40feb5ec761e..04927a49fb69 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -22,6 +22,7 @@
> #include "tdp_mmu.h"
> #include "x86.h"
> #include "kvm_cache_regs.h"
> +#include "smm.h"
> #include "kvm_emulate.h"
> #include "cpuid.h"
> #include "spte.h"
> diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
> new file mode 100644
> index 000000000000..b91c48d91f6e
> --- /dev/null
> +++ b/arch/x86/kvm/smm.c
> @@ -0,0 +1,37 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#include <linux/kvm_host.h>
> +#include "x86.h"
> +#include "kvm_cache_regs.h"
> +#include "kvm_emulate.h"
> +#include "smm.h"
> +#include "trace.h"
> +
> +void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
> +{
> + trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
> +
> + if (entering_smm) {
> + vcpu->arch.hflags |= HF_SMM_MASK;
> + } else {
> + vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
> +
> + /* Process a latched INIT or SMI, if any. */
> + kvm_make_request(KVM_REQ_EVENT, vcpu);
> +
> + /*
> + * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
> + * on SMM exit we still need to reload them from
> + * guest memory
> + */
> + vcpu->arch.pdptrs_from_userspace = false;
> + }
> +
> + kvm_mmu_reset_context(vcpu);
> +}
> +
> +void process_smi(struct kvm_vcpu *vcpu)
> +{
> + vcpu->arch.smi_pending = true;
> + kvm_make_request(KVM_REQ_EVENT, vcpu);
> +}
> diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
> new file mode 100644
> index 000000000000..d85d4ccd32dd
> --- /dev/null
> +++ b/arch/x86/kvm/smm.h
> @@ -0,0 +1,25 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef ASM_KVM_SMM_H
> +#define ASM_KVM_SMM_H
> +
> +#define GET_SMSTATE(type, buf, offset) \
> + (*(type *)((buf) + (offset) - 0x7e00))
> +
> +#define PUT_SMSTATE(type, buf, offset, val) \
> + *(type *)((buf) + (offset) - 0x7e00) = val
> +
> +static inline int kvm_inject_smi(struct kvm_vcpu *vcpu)
> +{
> + kvm_make_request(KVM_REQ_SMI, vcpu);
> + return 0;
> +}
> +
> +static inline bool is_smm(struct kvm_vcpu *vcpu)
> +{
> + return vcpu->arch.hflags & HF_SMM_MASK;
> +}
> +
> +void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
> +void process_smi(struct kvm_vcpu *vcpu);
> +
> +#endif
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 4c620999d230..cc0fd75f7cba 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -25,6 +25,7 @@
> #include "trace.h"
> #include "mmu.h"
> #include "x86.h"
> +#include "smm.h"
> #include "cpuid.h"
> #include "lapic.h"
> #include "svm.h"
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 58f0077d9357..496ee7d1ae2f 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -6,6 +6,7 @@
> #include "mmu.h"
> #include "kvm_cache_regs.h"
> #include "x86.h"
> +#include "smm.h"
> #include "cpuid.h"
> #include "pmu.h"
>
> @@ -4442,9 +4443,9 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
> return 0;
>
> /* FED8h - SVM Guest */
> - put_smstate(u64, smstate, 0x7ed8, 1);
> + PUT_SMSTATE(u64, smstate, 0x7ed8, 1);
> /* FEE0h - SVM Guest VMCB Physical Address */
> - put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
> + PUT_SMSTATE(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
>
> svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
> svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 8f67a9c4a287..29215925e75b 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -16,6 +16,7 @@
> #include "trace.h"
> #include "vmx.h"
> #include "x86.h"
> +#include "smm.h"
>
> static bool __read_mostly enable_shadow_vmcs = 1;
> module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 9dba04b6b019..038809c68006 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -66,6 +66,7 @@
> #include "vmcs12.h"
> #include "vmx.h"
> #include "x86.h"
> +#include "smm.h"
>
> MODULE_AUTHOR("Qumranet");
> MODULE_LICENSE("GPL");
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index eb9d2c23fb04..e0e461958c81 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -30,6 +30,7 @@
> #include "hyperv.h"
> #include "lapic.h"
> #include "xen.h"
> +#include "smm.h"
>
> #include <linux/clocksource.h>
> #include <linux/interrupt.h>
> @@ -119,7 +120,6 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
>
> static void update_cr8_intercept(struct kvm_vcpu *vcpu);
> static void process_nmi(struct kvm_vcpu *vcpu);
> -static void process_smi(struct kvm_vcpu *vcpu);
> static void enter_smm(struct kvm_vcpu *vcpu);
> static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
> static void store_regs(struct kvm_vcpu *vcpu);
> @@ -4883,13 +4883,6 @@ static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
> return 0;
> }
>
> -static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
> -{
> - kvm_make_request(KVM_REQ_SMI, vcpu);
> -
> - return 0;
> -}
> -
> static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
> struct kvm_tpr_access_ctl *tac)
> {
> @@ -5112,8 +5105,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
> memset(&events->reserved, 0, sizeof(events->reserved));
> }
>
> -static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm);
> -
> static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
> struct kvm_vcpu_events *events)
> {
> @@ -5566,7 +5557,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> break;
> }
> case KVM_SMI: {
> - r = kvm_vcpu_ioctl_smi(vcpu);
> + r = kvm_inject_smi(vcpu);
> break;
> }
> case KVM_SET_CPUID: {
> @@ -8514,29 +8505,6 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
> static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
> static int complete_emulated_pio(struct kvm_vcpu *vcpu);
>
> -static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
> -{
> - trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
> -
> - if (entering_smm) {
> - vcpu->arch.hflags |= HF_SMM_MASK;
> - } else {
> - vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
> -
> - /* Process a latched INIT or SMI, if any. */
> - kvm_make_request(KVM_REQ_EVENT, vcpu);
> -
> - /*
> - * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
> - * on SMM exit we still need to reload them from
> - * guest memory
> - */
> - vcpu->arch.pdptrs_from_userspace = false;
> - }
> -
> - kvm_mmu_reset_context(vcpu);
> -}
> -
> static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
> unsigned long *db)
> {
> @@ -10020,16 +9988,16 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
> int offset;
>
> kvm_get_segment(vcpu, &seg, n);
> - put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
> + PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
>
> if (n < 3)
> offset = 0x7f84 + n * 12;
> else
> offset = 0x7f2c + (n - 3) * 12;
>
> - put_smstate(u32, buf, offset + 8, seg.base);
> - put_smstate(u32, buf, offset + 4, seg.limit);
> - put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
> + PUT_SMSTATE(u32, buf, offset + 8, seg.base);
> + PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
> + PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
> }
>
> #ifdef CONFIG_X86_64
> @@ -10043,10 +10011,10 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
> offset = 0x7e00 + n * 16;
>
> flags = enter_smm_get_segment_flags(&seg) >> 8;
> - put_smstate(u16, buf, offset, seg.selector);
> - put_smstate(u16, buf, offset + 2, flags);
> - put_smstate(u32, buf, offset + 4, seg.limit);
> - put_smstate(u64, buf, offset + 8, seg.base);
> + PUT_SMSTATE(u16, buf, offset, seg.selector);
> + PUT_SMSTATE(u16, buf, offset + 2, flags);
> + PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
> + PUT_SMSTATE(u64, buf, offset + 8, seg.base);
> }
> #endif
>
> @@ -10057,47 +10025,47 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
> unsigned long val;
> int i;
>
> - put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
> - put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
> - put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
> - put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
> + PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
> + PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
> + PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
> + PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
>
> for (i = 0; i < 8; i++)
> - put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
> + PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
>
> kvm_get_dr(vcpu, 6, &val);
> - put_smstate(u32, buf, 0x7fcc, (u32)val);
> + PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
> kvm_get_dr(vcpu, 7, &val);
> - put_smstate(u32, buf, 0x7fc8, (u32)val);
> + PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
>
> kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
> - put_smstate(u32, buf, 0x7fc4, seg.selector);
> - put_smstate(u32, buf, 0x7f64, seg.base);
> - put_smstate(u32, buf, 0x7f60, seg.limit);
> - put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
> + PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
> + PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
> + PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
> + PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
>
> kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
> - put_smstate(u32, buf, 0x7fc0, seg.selector);
> - put_smstate(u32, buf, 0x7f80, seg.base);
> - put_smstate(u32, buf, 0x7f7c, seg.limit);
> - put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
> + PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
> + PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
> + PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
> + PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
>
> static_call(kvm_x86_get_gdt)(vcpu, &dt);
> - put_smstate(u32, buf, 0x7f74, dt.address);
> - put_smstate(u32, buf, 0x7f70, dt.size);
> + PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
> + PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
>
> static_call(kvm_x86_get_idt)(vcpu, &dt);
> - put_smstate(u32, buf, 0x7f58, dt.address);
> - put_smstate(u32, buf, 0x7f54, dt.size);
> + PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
> + PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
>
> for (i = 0; i < 6; i++)
> enter_smm_save_seg_32(vcpu, buf, i);
>
> - put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
> + PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
>
> /* revision id */
> - put_smstate(u32, buf, 0x7efc, 0x00020000);
> - put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
> + PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
> + PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
> }
>
> #ifdef CONFIG_X86_64
> @@ -10109,46 +10077,46 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
> int i;
>
> for (i = 0; i < 16; i++)
> - put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
> + PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
>
> - put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
> - put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
> + PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
> + PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
>
> kvm_get_dr(vcpu, 6, &val);
> - put_smstate(u64, buf, 0x7f68, val);
> + PUT_SMSTATE(u64, buf, 0x7f68, val);
> kvm_get_dr(vcpu, 7, &val);
> - put_smstate(u64, buf, 0x7f60, val);
> + PUT_SMSTATE(u64, buf, 0x7f60, val);
>
> - put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
> - put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
> - put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
> + PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
> + PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
> + PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
>
> - put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
> + PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
>
> /* revision id */
> - put_smstate(u32, buf, 0x7efc, 0x00020064);
> + PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
>
> - put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
> + PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
>
> kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
> - put_smstate(u16, buf, 0x7e90, seg.selector);
> - put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
> - put_smstate(u32, buf, 0x7e94, seg.limit);
> - put_smstate(u64, buf, 0x7e98, seg.base);
> + PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
> + PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
> + PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
> + PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
>
> static_call(kvm_x86_get_idt)(vcpu, &dt);
> - put_smstate(u32, buf, 0x7e84, dt.size);
> - put_smstate(u64, buf, 0x7e88, dt.address);
> + PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
> + PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
>
> kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
> - put_smstate(u16, buf, 0x7e70, seg.selector);
> - put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
> - put_smstate(u32, buf, 0x7e74, seg.limit);
> - put_smstate(u64, buf, 0x7e78, seg.base);
> + PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
> + PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
> + PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
> + PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
>
> static_call(kvm_x86_get_gdt)(vcpu, &dt);
> - put_smstate(u32, buf, 0x7e64, dt.size);
> - put_smstate(u64, buf, 0x7e68, dt.address);
> + PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
> + PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
>
> for (i = 0; i < 6; i++)
> enter_smm_save_seg_64(vcpu, buf, i);
> @@ -10234,12 +10202,6 @@ static void enter_smm(struct kvm_vcpu *vcpu)
> kvm_mmu_reset_context(vcpu);
> }
>
> -static void process_smi(struct kvm_vcpu *vcpu)
> -{
> - vcpu->arch.smi_pending = true;
> - kvm_make_request(KVM_REQ_EVENT, vcpu);
> -}
> -
> void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
> unsigned long *vcpu_bitmap)
> {


Besides the remark about put/get_smmstate:

Reviewed-by: Maxim Levitsky <[email protected]>

Best regards,
Maxim Levitsky


2022-10-24 14:18:08

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v2 8/8] KVM: x86: do not define KVM_REQ_SMI if SMM disabled

On Thu, 2022-09-29 at 13:20 -0400, Paolo Bonzini wrote:
> This ensures that all the relevant code is compiled out, in fact
> the process_smi stub can be removed too.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 2 ++
> arch/x86/kvm/smm.h | 1 -
> arch/x86/kvm/x86.c | 6 ++++++
> 3 files changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d11697504471..d58d4a62b227 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -81,7 +81,9 @@
> #define KVM_REQ_NMI KVM_ARCH_REQ(9)
> #define KVM_REQ_PMU KVM_ARCH_REQ(10)
> #define KVM_REQ_PMI KVM_ARCH_REQ(11)
> +#ifdef CONFIG_KVM_SMM
> #define KVM_REQ_SMI KVM_ARCH_REQ(12)
> +#endif
> #define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13)
> #define KVM_REQ_MCLOCK_INPROGRESS \
> KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
> index 7ccce6b655ca..a6795b93ba30 100644
> --- a/arch/x86/kvm/smm.h
> +++ b/arch/x86/kvm/smm.h
> @@ -28,7 +28,6 @@ void process_smi(struct kvm_vcpu *vcpu);
> static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
> static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
> static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); }
> -static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
>
> /*
> * emulator_leave_smm is used as a function pointer, so the
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e22184bad92b..ba5661ee3fd7 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5020,8 +5020,10 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
>
> process_nmi(vcpu);
>
> +#ifdef CONFIG_KVM_SMM
> if (kvm_check_request(KVM_REQ_SMI, vcpu))
> process_smi(vcpu);
> +#endif
>
> /*
> * KVM's ABI only allows for one exception to be migrated. Luckily,
> @@ -10194,8 +10196,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> }
> if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
> record_steal_time(vcpu);
> +#ifdef CONFIG_KVM_SMM
> if (kvm_check_request(KVM_REQ_SMI, vcpu))
> process_smi(vcpu);
> +#endif
> if (kvm_check_request(KVM_REQ_NMI, vcpu))
> process_nmi(vcpu);
> if (kvm_check_request(KVM_REQ_PMU, vcpu))
> @@ -12539,7 +12543,9 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
> return true;
>
> if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
> +#ifdef CONFIG_KVM_SMM
> kvm_test_request(KVM_REQ_SMI, vcpu) ||
> +#endif
> kvm_test_request(KVM_REQ_EVENT, vcpu))
> return true;
>

Reviewed-by: Maxim Levitsky <[email protected]>

Best regards,
Maxim Levitsky


2022-10-24 18:33:14

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v2 3/8] KVM: x86: move SMM exit to a new file

On Thu, 2022-09-29 at 13:20 -0400, Paolo Bonzini wrote:
> Some users of KVM implement the UEFI variable store through a paravirtual
> device that does not require the "SMM lockbox" component of edk2, and
> would like to compile out system management mode. In preparation for
> that, move the SMM exit code out of emulate.c and into a new file.
>
> The code is still written as a series of invocations of the emulator
> callbacks, but the two exiting_smm and leave_smm callbacks are merged
> into one, and all the code from em_rsm is now part of the callback.
> This removes all knowledge of the format of the SMM save state area
> from the emulator. Further patches will clean up the code and
> invoke KVM's own functions to access control registers, descriptor
> caches, etc.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/emulate.c | 356 +------------------------------------
> arch/x86/kvm/kvm_emulate.h | 34 +++-
> arch/x86/kvm/smm.c | 316 ++++++++++++++++++++++++++++++++
> arch/x86/kvm/smm.h | 1 +
> arch/x86/kvm/x86.c | 14 --
> 5 files changed, 351 insertions(+), 370 deletions(-)
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 5208a13e40e0..af3f70b52f85 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -30,7 +30,6 @@
> #include "tss.h"
> #include "mmu.h"
> #include "pmu.h"
> -#include "smm.h"
>
> /*
> * Operand types
> @@ -243,37 +242,6 @@ enum x86_transfer_type {
> X86_TRANSFER_TASK_SWITCH,
> };
>
> -static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
> -{
> - if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
> - nr &= NR_EMULATOR_GPRS - 1;
> -
> - if (!(ctxt->regs_valid & (1 << nr))) {
> - ctxt->regs_valid |= 1 << nr;
> - ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
> - }
> - return ctxt->_regs[nr];
> -}
> -
> -static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
> -{
> - if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
> - nr &= NR_EMULATOR_GPRS - 1;
> -
> - BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
> - BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
> -
> - ctxt->regs_valid |= 1 << nr;
> - ctxt->regs_dirty |= 1 << nr;
> - return &ctxt->_regs[nr];
> -}
> -
> -static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
> -{
> - reg_read(ctxt, nr);
> - return reg_write(ctxt, nr);
> -}
> -
> static void writeback_registers(struct x86_emulate_ctxt *ctxt)
> {
> unsigned long dirty = ctxt->regs_dirty;
> @@ -2310,334 +2278,14 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
> return rc;
> }
>
> -static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
> -{
> -#ifdef CONFIG_X86_64
> - return ctxt->ops->guest_has_long_mode(ctxt);
> -#else
> - return false;
> -#endif
> -}
> -
> -static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
> -{
> - desc->g = (flags >> 23) & 1;
> - desc->d = (flags >> 22) & 1;
> - desc->l = (flags >> 21) & 1;
> - desc->avl = (flags >> 20) & 1;
> - desc->p = (flags >> 15) & 1;
> - desc->dpl = (flags >> 13) & 3;
> - desc->s = (flags >> 12) & 1;
> - desc->type = (flags >> 8) & 15;
> -}
> -
> -static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
> - int n)
> -{
> - struct desc_struct desc;
> - int offset;
> - u16 selector;
> -
> - selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
> -
> - if (n < 3)
> - offset = 0x7f84 + n * 12;
> - else
> - offset = 0x7f2c + (n - 3) * 12;
> -
> - set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
> - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
> - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
> - ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
> - return X86EMUL_CONTINUE;
> -}
> -
> -#ifdef CONFIG_X86_64
> -static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
> - int n)
> -{
> - struct desc_struct desc;
> - int offset;
> - u16 selector;
> - u32 base3;
> -
> - offset = 0x7e00 + n * 16;
> -
> - selector = GET_SMSTATE(u16, smstate, offset);
> - rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
> - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
> - set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
> - base3 = GET_SMSTATE(u32, smstate, offset + 12);
> -
> - ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
> - return X86EMUL_CONTINUE;
> -}
> -#endif
> -
> -static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
> - u64 cr0, u64 cr3, u64 cr4)
> -{
> - int bad;
> - u64 pcid;
> -
> - /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
> - pcid = 0;
> - if (cr4 & X86_CR4_PCIDE) {
> - pcid = cr3 & 0xfff;
> - cr3 &= ~0xfff;
> - }
> -
> - bad = ctxt->ops->set_cr(ctxt, 3, cr3);
> - if (bad)
> - return X86EMUL_UNHANDLEABLE;
> -
> - /*
> - * First enable PAE, long mode needs it before CR0.PG = 1 is set.
> - * Then enable protected mode. However, PCID cannot be enabled
> - * if EFER.LMA=0, so set it separately.
> - */
> - bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
> - if (bad)
> - return X86EMUL_UNHANDLEABLE;
> -
> - bad = ctxt->ops->set_cr(ctxt, 0, cr0);
> - if (bad)
> - return X86EMUL_UNHANDLEABLE;
> -
> - if (cr4 & X86_CR4_PCIDE) {
> - bad = ctxt->ops->set_cr(ctxt, 4, cr4);
> - if (bad)
> - return X86EMUL_UNHANDLEABLE;
> - if (pcid) {
> - bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
> - if (bad)
> - return X86EMUL_UNHANDLEABLE;
> - }
> -
> - }
> -
> - return X86EMUL_CONTINUE;
> -}
> -
> -static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
> - const char *smstate)
> -{
> - struct desc_struct desc;
> - struct desc_ptr dt;
> - u16 selector;
> - u32 val, cr0, cr3, cr4;
> - int i;
> -
> - cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
> - cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
> - ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
> - ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
> -
> - for (i = 0; i < NR_EMULATOR_GPRS; i++)
> - *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
> -
> - val = GET_SMSTATE(u32, smstate, 0x7fcc);
> -
> - if (ctxt->ops->set_dr(ctxt, 6, val))
> - return X86EMUL_UNHANDLEABLE;
> -
> - val = GET_SMSTATE(u32, smstate, 0x7fc8);
> -
> - if (ctxt->ops->set_dr(ctxt, 7, val))
> - return X86EMUL_UNHANDLEABLE;
> -
> - selector = GET_SMSTATE(u32, smstate, 0x7fc4);
> - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
> - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
> - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
> - ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
> -
> - selector = GET_SMSTATE(u32, smstate, 0x7fc0);
> - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
> - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
> - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
> - ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
> -
> - dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
> - dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
> - ctxt->ops->set_gdt(ctxt, &dt);
> -
> - dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
> - dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
> - ctxt->ops->set_idt(ctxt, &dt);
> -
> - for (i = 0; i < 6; i++) {
> - int r = rsm_load_seg_32(ctxt, smstate, i);
> - if (r != X86EMUL_CONTINUE)
> - return r;
> - }
> -
> - cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
> -
> - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
> -
> - return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
> -}
> -
> -#ifdef CONFIG_X86_64
> -static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
> - const char *smstate)
> -{
> - struct desc_struct desc;
> - struct desc_ptr dt;
> - u64 val, cr0, cr3, cr4;
> - u32 base3;
> - u16 selector;
> - int i, r;
> -
> - for (i = 0; i < NR_EMULATOR_GPRS; i++)
> - *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
> -
> - ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
> - ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
> -
> - val = GET_SMSTATE(u64, smstate, 0x7f68);
> -
> - if (ctxt->ops->set_dr(ctxt, 6, val))
> - return X86EMUL_UNHANDLEABLE;
> -
> - val = GET_SMSTATE(u64, smstate, 0x7f60);
> -
> - if (ctxt->ops->set_dr(ctxt, 7, val))
> - return X86EMUL_UNHANDLEABLE;
> -
> - cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
> - cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
> - cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
> - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
> - val = GET_SMSTATE(u64, smstate, 0x7ed0);
> -
> - if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
> - return X86EMUL_UNHANDLEABLE;
> -
> - selector = GET_SMSTATE(u32, smstate, 0x7e90);
> - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
> - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
> - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
> - base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
> - ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
> -
> - dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
> - dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
> - ctxt->ops->set_idt(ctxt, &dt);
> -
> - selector = GET_SMSTATE(u32, smstate, 0x7e70);
> - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
> - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
> - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
> - base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
> - ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
> -
> - dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
> - dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
> - ctxt->ops->set_gdt(ctxt, &dt);
> -
> - r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
> - if (r != X86EMUL_CONTINUE)
> - return r;
> -
> - for (i = 0; i < 6; i++) {
> - r = rsm_load_seg_64(ctxt, smstate, i);
> - if (r != X86EMUL_CONTINUE)
> - return r;
> - }
> -
> - return X86EMUL_CONTINUE;
> -}
> -#endif
> -
> static int em_rsm(struct x86_emulate_ctxt *ctxt)
> {
> - unsigned long cr0, cr4, efer;
> - char buf[512];
> - u64 smbase;
> - int ret;
> -
> if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
> return emulate_ud(ctxt);
>
> - smbase = ctxt->ops->get_smbase(ctxt);
> -
> - ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
> - if (ret != X86EMUL_CONTINUE)
> - return X86EMUL_UNHANDLEABLE;
> -
> - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
> - ctxt->ops->set_nmi_mask(ctxt, false);
> -
> - ctxt->ops->exiting_smm(ctxt);
> -
> - /*
> - * Get back to real mode, to prepare a safe state in which to load
> - * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
> - * supports long mode.
> - */
> - if (emulator_has_longmode(ctxt)) {
> - struct desc_struct cs_desc;
> -
> - /* Zero CR4.PCIDE before CR0.PG. */
> - cr4 = ctxt->ops->get_cr(ctxt, 4);
> - if (cr4 & X86_CR4_PCIDE)
> - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
> -
> - /* A 32-bit code segment is required to clear EFER.LMA. */
> - memset(&cs_desc, 0, sizeof(cs_desc));
> - cs_desc.type = 0xb;
> - cs_desc.s = cs_desc.g = cs_desc.p = 1;
> - ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
> - }
> -
> - /* For the 64-bit case, this will clear EFER.LMA. */
> - cr0 = ctxt->ops->get_cr(ctxt, 0);
> - if (cr0 & X86_CR0_PE)
> - ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
> -
> - if (emulator_has_longmode(ctxt)) {
> - /* Clear CR4.PAE before clearing EFER.LME. */
> - cr4 = ctxt->ops->get_cr(ctxt, 4);
> - if (cr4 & X86_CR4_PAE)
> - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
> -
> - /* And finally go back to 32-bit mode. */
> - efer = 0;
> - ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
> - }
> -
> - /*
> - * Give leave_smm() a chance to make ISA-specific changes to the vCPU
> - * state (e.g. enter guest mode) before loading state from the SMM
> - * state-save area.
> - */
> - if (ctxt->ops->leave_smm(ctxt, buf))
> - goto emulate_shutdown;
> -
> -#ifdef CONFIG_X86_64
> - if (emulator_has_longmode(ctxt))
> - ret = rsm_load_state_64(ctxt, buf);
> - else
> -#endif
> - ret = rsm_load_state_32(ctxt, buf);
> -
> - if (ret != X86EMUL_CONTINUE)
> - goto emulate_shutdown;
> -
> - /*
> - * Note, the ctxt->ops callbacks are responsible for handling side
> - * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
> - * runtime updates, etc... If that changes, e.g. this flow is moved
> - * out of the emulator to make it look more like enter_smm(), then
> - * those side effects need to be explicitly handled for both success
> - * and shutdown.
> - */
> - return X86EMUL_CONTINUE;
> + if (ctxt->ops->leave_smm(ctxt))
> + ctxt->ops->triple_fault(ctxt);
>
> -emulate_shutdown:
> - ctxt->ops->triple_fault(ctxt);
> return X86EMUL_CONTINUE;
> }
>
> diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
> index 89246446d6aa..d7afbc448dd2 100644
> --- a/arch/x86/kvm/kvm_emulate.h
> +++ b/arch/x86/kvm/kvm_emulate.h
> @@ -234,8 +234,7 @@ struct x86_emulate_ops {
> void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
>
> unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
> - void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
> - int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate);
> + int (*leave_smm)(struct x86_emulate_ctxt *ctxt);
> void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
> int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
> };
> @@ -526,4 +525,35 @@ void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt);
> void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt);
> bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt);
>
> +static inline ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
> +{
> + if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
> + nr &= NR_EMULATOR_GPRS - 1;
> +
> + if (!(ctxt->regs_valid & (1 << nr))) {
> + ctxt->regs_valid |= 1 << nr;
> + ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
> + }
> + return ctxt->_regs[nr];
> +}
> +
> +static inline ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
> +{
> + if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
> + nr &= NR_EMULATOR_GPRS - 1;
> +
> + BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
> + BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
> +
> + ctxt->regs_valid |= 1 << nr;
> + ctxt->regs_dirty |= 1 << nr;
> + return &ctxt->_regs[nr];
> +}
> +
> +static inline ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
> +{
> + reg_read(ctxt, nr);
> + return reg_write(ctxt, nr);
> +}
> +
> #endif /* _ASM_X86_KVM_X86_EMULATE_H */
> diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
> index 26a6859e421f..773e07b6397d 100644
> --- a/arch/x86/kvm/smm.c
> +++ b/arch/x86/kvm/smm.c
> @@ -270,3 +270,319 @@ void enter_smm(struct kvm_vcpu *vcpu)
> kvm_update_cpuid_runtime(vcpu);
> kvm_mmu_reset_context(vcpu);
> }
> +
> +static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
> +{
> +#ifdef CONFIG_X86_64
> + return ctxt->ops->guest_has_long_mode(ctxt);
> +#else
> + return false;
> +#endif
> +}
> +
> +static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
> +{
> + desc->g = (flags >> 23) & 1;
> + desc->d = (flags >> 22) & 1;
> + desc->l = (flags >> 21) & 1;
> + desc->avl = (flags >> 20) & 1;
> + desc->p = (flags >> 15) & 1;
> + desc->dpl = (flags >> 13) & 3;
> + desc->s = (flags >> 12) & 1;
> + desc->type = (flags >> 8) & 15;
> +}
> +
> +static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
> + int n)
> +{
> + struct desc_struct desc;
> + int offset;
> + u16 selector;
> +
> + selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
> +
> + if (n < 3)
> + offset = 0x7f84 + n * 12;
> + else
> + offset = 0x7f2c + (n - 3) * 12;
> +
> + set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
> + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
> + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
> + ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
> + return X86EMUL_CONTINUE;
> +}
> +
> +#ifdef CONFIG_X86_64
> +static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
> + int n)
> +{
> + struct desc_struct desc;
> + int offset;
> + u16 selector;
> + u32 base3;
> +
> + offset = 0x7e00 + n * 16;
> +
> + selector = GET_SMSTATE(u16, smstate, offset);
> + rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
> + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
> + set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
> + base3 = GET_SMSTATE(u32, smstate, offset + 12);
> +
> + ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
> + return X86EMUL_CONTINUE;
> +}
> +#endif
> +
> +static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
> + u64 cr0, u64 cr3, u64 cr4)
> +{
> + int bad;
> + u64 pcid;
> +
> + /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
> + pcid = 0;
> + if (cr4 & X86_CR4_PCIDE) {
> + pcid = cr3 & 0xfff;
> + cr3 &= ~0xfff;
> + }
> +
> + bad = ctxt->ops->set_cr(ctxt, 3, cr3);
> + if (bad)
> + return X86EMUL_UNHANDLEABLE;
> +
> + /*
> + * First enable PAE, long mode needs it before CR0.PG = 1 is set.
> + * Then enable protected mode. However, PCID cannot be enabled
> + * if EFER.LMA=0, so set it separately.
> + */
> + bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
> + if (bad)
> + return X86EMUL_UNHANDLEABLE;
> +
> + bad = ctxt->ops->set_cr(ctxt, 0, cr0);
> + if (bad)
> + return X86EMUL_UNHANDLEABLE;
> +
> + if (cr4 & X86_CR4_PCIDE) {
> + bad = ctxt->ops->set_cr(ctxt, 4, cr4);
> + if (bad)
> + return X86EMUL_UNHANDLEABLE;
> + if (pcid) {
> + bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
> + if (bad)
> + return X86EMUL_UNHANDLEABLE;
> + }
> +
> + }
> +
> + return X86EMUL_CONTINUE;
> +}
> +
> +static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
> + const char *smstate)
> +{
> + struct desc_struct desc;
> + struct desc_ptr dt;
> + u16 selector;
> + u32 val, cr0, cr3, cr4;
> + int i;
> +
> + cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
> + cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
> + ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
> + ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
> +
> + for (i = 0; i < NR_EMULATOR_GPRS; i++)
> + *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
> +
> + val = GET_SMSTATE(u32, smstate, 0x7fcc);
> +
> + if (ctxt->ops->set_dr(ctxt, 6, val))
> + return X86EMUL_UNHANDLEABLE;
> +
> + val = GET_SMSTATE(u32, smstate, 0x7fc8);
> +
> + if (ctxt->ops->set_dr(ctxt, 7, val))
> + return X86EMUL_UNHANDLEABLE;
> +
> + selector = GET_SMSTATE(u32, smstate, 0x7fc4);
> + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
> + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
> + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
> + ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
> +
> + selector = GET_SMSTATE(u32, smstate, 0x7fc0);
> + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
> + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
> + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
> + ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
> +
> + dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
> + dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
> + ctxt->ops->set_gdt(ctxt, &dt);
> +
> + dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
> + dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
> + ctxt->ops->set_idt(ctxt, &dt);
> +
> + for (i = 0; i < 6; i++) {
> + int r = rsm_load_seg_32(ctxt, smstate, i);
> + if (r != X86EMUL_CONTINUE)
> + return r;
> + }
> +
> + cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
> +
> + ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
> +
> + return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
> +}
> +
> +#ifdef CONFIG_X86_64
> +static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
> + const char *smstate)
> +{
> + struct desc_struct desc;
> + struct desc_ptr dt;
> + u64 val, cr0, cr3, cr4;
> + u32 base3;
> + u16 selector;
> + int i, r;
> +
> + for (i = 0; i < NR_EMULATOR_GPRS; i++)
> + *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
> +
> + ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
> + ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
> +
> + val = GET_SMSTATE(u64, smstate, 0x7f68);
> +
> + if (ctxt->ops->set_dr(ctxt, 6, val))
> + return X86EMUL_UNHANDLEABLE;
> +
> + val = GET_SMSTATE(u64, smstate, 0x7f60);
> +
> + if (ctxt->ops->set_dr(ctxt, 7, val))
> + return X86EMUL_UNHANDLEABLE;
> +
> + cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
> + cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
> + cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
> + ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
> + val = GET_SMSTATE(u64, smstate, 0x7ed0);
> +
> + if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
> + return X86EMUL_UNHANDLEABLE;
> +
> + selector = GET_SMSTATE(u32, smstate, 0x7e90);
> + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
> + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
> + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
> + base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
> + ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
> +
> + dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
> + dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
> + ctxt->ops->set_idt(ctxt, &dt);
> +
> + selector = GET_SMSTATE(u32, smstate, 0x7e70);
> + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
> + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
> + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
> + base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
> + ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
> +
> + dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
> + dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
> + ctxt->ops->set_gdt(ctxt, &dt);
> +
> + r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
> + if (r != X86EMUL_CONTINUE)
> + return r;
> +
> + for (i = 0; i < 6; i++) {
> + r = rsm_load_seg_64(ctxt, smstate, i);
> + if (r != X86EMUL_CONTINUE)
> + return r;
> + }
> +
> + return X86EMUL_CONTINUE;
> +}
> +#endif
> +
> +int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
> +{
> + struct kvm_vcpu *vcpu = ctxt->vcpu;
> + unsigned long cr0, cr4, efer;
> + char buf[512];
> + u64 smbase;
> + int ret;
> +
> + smbase = ctxt->ops->get_smbase(ctxt);
> +
> + ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
> + if (ret != X86EMUL_CONTINUE)
> + return X86EMUL_UNHANDLEABLE;
> +
> + if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
> + ctxt->ops->set_nmi_mask(ctxt, false);
> +
> + kvm_smm_changed(vcpu, false);
> +
> + /*
> + * Get back to real mode, to prepare a safe state in which to load
> + * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
> + * supports long mode.
> + *
> + * The ctxt->ops callbacks will handle all side effects when writing
> + * writing MSRs and CRs, e.g. MMU context resets, CPUID
> + * runtime updates, etc.
> + */
> + if (emulator_has_longmode(ctxt)) {
> + struct desc_struct cs_desc;
> +
> + /* Zero CR4.PCIDE before CR0.PG. */
> + cr4 = ctxt->ops->get_cr(ctxt, 4);
> + if (cr4 & X86_CR4_PCIDE)
> + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
> +
> + /* A 32-bit code segment is required to clear EFER.LMA. */
> + memset(&cs_desc, 0, sizeof(cs_desc));
> + cs_desc.type = 0xb;
> + cs_desc.s = cs_desc.g = cs_desc.p = 1;
> + ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
> + }
> +
> + /* For the 64-bit case, this will clear EFER.LMA. */
> + cr0 = ctxt->ops->get_cr(ctxt, 0);
> + if (cr0 & X86_CR0_PE)
> + ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
> +
> + if (emulator_has_longmode(ctxt)) {
> + /* Clear CR4.PAE before clearing EFER.LME. */
> + cr4 = ctxt->ops->get_cr(ctxt, 4);
> + if (cr4 & X86_CR4_PAE)
> + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
> +
> + /* And finally go back to 32-bit mode. */
> + efer = 0;
> + ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
> + }
> +
> + /*
> + * Give leave_smm() a chance to make ISA-specific changes to the vCPU
> + * state (e.g. enter guest mode) before loading state from the SMM
> + * state-save area.
> + */
> + if (static_call(kvm_x86_leave_smm)(vcpu, buf))
> + return X86EMUL_UNHANDLEABLE;
> +
> +#ifdef CONFIG_X86_64
> + if (emulator_has_longmode(ctxt))
> + return rsm_load_state_64(ctxt, buf);
> + else
> +#endif
> + return rsm_load_state_32(ctxt, buf);
> +}
> diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
> index aacc6dac2c99..b0602a92e511 100644
> --- a/arch/x86/kvm/smm.h
> +++ b/arch/x86/kvm/smm.h
> @@ -21,6 +21,7 @@ static inline bool is_smm(struct kvm_vcpu *vcpu)
>
> void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
> void enter_smm(struct kvm_vcpu *vcpu);
> +int emulator_leave_smm(struct x86_emulate_ctxt *ctxt);
> void process_smi(struct kvm_vcpu *vcpu);
>
> #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 476b4a6e81ab..97d6ee179109 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8095,19 +8095,6 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
> return emul_to_vcpu(ctxt)->arch.hflags;
> }
>
> -static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
> -{
> - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
> -
> - kvm_smm_changed(vcpu, false);
> -}
> -
> -static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
> - const char *smstate)
> -{
> - return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
> -}
> -
> static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
> {
> kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
> @@ -8171,7 +8158,6 @@ static const struct x86_emulate_ops emulate_ops = {
> .guest_has_rdpid = emulator_guest_has_rdpid,
> .set_nmi_mask = emulator_set_nmi_mask,
> .get_hflags = emulator_get_hflags,
> - .exiting_smm = emulator_exiting_smm,
> .leave_smm = emulator_leave_smm,
> .triple_fault = emulator_triple_fault,
> .set_xcr = emulator_set_xcr,


Reviewed-by: Maxim Levitsky <[email protected]>

Best regards,
Maxim Levitsky

2022-10-24 19:27:01

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v2 6/8] KVM: x86: compile out vendor-specific code if SMM is disabled

On Thu, 2022-09-29 at 13:20 -0400, Paolo Bonzini wrote:
> Vendor-specific code that deals with SMI injection and saving/restoring
> SMM state is not needed if CONFIG_KVM_SMM is disabled, so remove the
> four callbacks smi_allowed, enter_smm, leave_smm and enable_smi_window.
> The users in svm/nested.c and x86.c also have to be compiled out; the
> amount of #ifdef'ed code is small and it's not worth moving it to
> smm.c.
>
> enter_smm is now used only within #ifdef CONFIG_KVM_SMM, and the stub
> can therefore be removed.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/include/asm/kvm-x86-ops.h | 2 ++
> arch/x86/include/asm/kvm_host.h | 2 ++
> arch/x86/kvm/smm.h | 1 -
> arch/x86/kvm/svm/nested.c | 2 ++
> arch/x86/kvm/svm/svm.c | 4 ++++
> arch/x86/kvm/vmx/vmx.c | 4 ++++
> arch/x86/kvm/x86.c | 4 ++++
> 7 files changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> index 82ba4a564e58..ea58e67e9a67 100644
> --- a/arch/x86/include/asm/kvm-x86-ops.h
> +++ b/arch/x86/include/asm/kvm-x86-ops.h
> @@ -110,10 +110,12 @@ KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)
> KVM_X86_OP_OPTIONAL(set_hv_timer)
> KVM_X86_OP_OPTIONAL(cancel_hv_timer)
> KVM_X86_OP(setup_mce)
> +#ifdef CONFIG_KVM_SMM
> KVM_X86_OP(smi_allowed)
> KVM_X86_OP(enter_smm)
> KVM_X86_OP(leave_smm)
> KVM_X86_OP(enable_smi_window)
> +#endif
> KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
> KVM_X86_OP_OPTIONAL(mem_enc_register_region)
> KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b7d078cd768d..cb88da02d965 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1606,10 +1606,12 @@ struct kvm_x86_ops {
>
> void (*setup_mce)(struct kvm_vcpu *vcpu);
>
> +#ifdef CONFIG_KVM_SMM
> int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
> int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
> int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
> void (*enable_smi_window)(struct kvm_vcpu *vcpu);
> +#endif
>
> int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
> int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
> diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
> index 4c699fee4492..7ccce6b655ca 100644
> --- a/arch/x86/kvm/smm.h
> +++ b/arch/x86/kvm/smm.h
> @@ -28,7 +28,6 @@ void process_smi(struct kvm_vcpu *vcpu);
> static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
> static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
> static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); }
> -static inline void enter_smm(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
> static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
>
> /*
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index cc0fd75f7cba..b258d6988f5d 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -1378,6 +1378,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
> return 0;
> }
>
> +#ifdef CONFIG_KVM_SMM
> if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
> if (block_nested_events)
> return -EBUSY;
> @@ -1386,6 +1387,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
> nested_svm_simple_vmexit(svm, SVM_EXIT_SMI);
> return 0;
> }
> +#endif
>
> if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) {
> if (block_nested_events)
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 6f7ceb35d2ff..2200b8aa7273 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4408,6 +4408,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
> vcpu->arch.mcg_cap &= 0x1ff;
> }
>
> +#ifdef CONFIG_KVM_SMM
> bool svm_smi_blocked(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> @@ -4557,6 +4558,7 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
> /* We must be in SMM; RSM will cause a vmexit anyway. */
> }
> }
> +#endif
>
> static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
> void *insn, int insn_len)
> @@ -4832,10 +4834,12 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> .pi_update_irte = avic_pi_update_irte,
> .setup_mce = svm_setup_mce,
>
> +#ifdef CONFIG_KVM_SMM
> .smi_allowed = svm_smi_allowed,
> .enter_smm = svm_enter_smm,
> .leave_smm = svm_leave_smm,
> .enable_smi_window = svm_enable_smi_window,
> +#endif
>
> .mem_enc_ioctl = sev_mem_enc_ioctl,
> .mem_enc_register_region = sev_mem_enc_register_region,
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index b22330a15adb..107fc035c91b 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7905,6 +7905,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
> ~FEAT_CTL_LMCE_ENABLED;
> }
>
> +#ifdef CONFIG_KVM_SMM
> static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
> {
> /* we need a nested vmexit to enter SMM, postpone if run is pending */
> @@ -7959,6 +7960,7 @@ static void vmx_enable_smi_window(struct kvm_vcpu *vcpu)
> {
> /* RSM will cause a vmexit anyway. */
> }
> +#endif
>
> static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
> {
> @@ -8126,10 +8128,12 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
>
> .setup_mce = vmx_setup_mce,
>
> +#ifdef CONFIG_KVM_SMM
> .smi_allowed = vmx_smi_allowed,
> .enter_smm = vmx_enter_smm,
> .leave_smm = vmx_leave_smm,
> .enable_smi_window = vmx_enable_smi_window,
> +#endif
>
> .can_emulate_instruction = vmx_can_emulate_instruction,
> .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a9e050aefea6..e22184bad92b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9863,6 +9863,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
> * in order to make progress and get back here for another iteration.
> * The kvm_x86_ops hooks communicate this by returning -EBUSY.
> */
> +#ifdef CONFIG_KVM_SMM
> if (vcpu->arch.smi_pending) {
> r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY;
> if (r < 0)
> @@ -9875,6 +9876,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
> } else
> static_call(kvm_x86_enable_smi_window)(vcpu);
> }
> +#endif
>
> if (vcpu->arch.nmi_pending) {
> r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY;
> @@ -12491,10 +12493,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
> static_call(kvm_x86_nmi_allowed)(vcpu, false)))
> return true;
>
> +#ifdef CONFIG_KVM_SMM
> if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
> (vcpu->arch.smi_pending &&
> static_call(kvm_x86_smi_allowed)(vcpu, false)))
> return true;
> +#endif
>
> if (kvm_arch_interrupt_allowed(vcpu) &&
> (kvm_cpu_has_interrupt(vcpu) ||


Reviewed-by: Maxim Levitsky <[email protected]>

Best regards,
Maxim Levitsky


2022-10-24 19:42:19

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v2 4/8] KVM: x86: do not go through ctxt->ops when emulating rsm

On Thu, 2022-09-29 at 13:20 -0400, Paolo Bonzini wrote:
> Now that RSM is implemented in a single emulator callback, there is no
> point in going through other callbacks for the sake of modifying
> processor state. Just invoke KVM's own internal functions directly,
> and remove the callbacks that were only used by em_rsm; the only
> substantial difference is in the handling of the segment registers
> and descriptor cache, which have to be parsed into a struct kvm_segment
> instead of a struct desc_struct.
>
> This also fixes a bug where emulator_set_segment was shifting the
> limit left by 12 if the G bit is set, but the limit had not been
> shifted right upon entry to SMM.
>
> The emulator context is still used to restore EIP and the general
> purpose registers.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/kvm_emulate.h | 13 ---
> arch/x86/kvm/smm.c | 177 +++++++++++++++++--------------------
> arch/x86/kvm/x86.c | 33 -------
> 3 files changed, 81 insertions(+), 142 deletions(-)
>
> diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
> index d7afbc448dd2..84b1f2661463 100644
> --- a/arch/x86/kvm/kvm_emulate.h
> +++ b/arch/x86/kvm/kvm_emulate.h
> @@ -116,16 +116,6 @@ struct x86_emulate_ops {
> unsigned int bytes,
> struct x86_exception *fault, bool system);
>
> - /*
> - * read_phys: Read bytes of standard (non-emulated/special) memory.
> - * Used for descriptor reading.
> - * @addr: [IN ] Physical address from which to read.
> - * @val: [OUT] Value read from memory.
> - * @bytes: [IN ] Number of bytes to read from memory.
> - */
> - int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr,
> - void *val, unsigned int bytes);
> -
> /*
> * write_std: Write bytes of standard (non-emulated/special) memory.
> * Used for descriptor writing.
> @@ -209,11 +199,8 @@ struct x86_emulate_ops {
> int (*cpl)(struct x86_emulate_ctxt *ctxt);
> void (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
> int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
> - u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt);
> - void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase);
> int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
> int (*get_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
> - int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
> int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
> int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
> int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
> diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
> index 773e07b6397d..41ca128478fc 100644
> --- a/arch/x86/kvm/smm.c
> +++ b/arch/x86/kvm/smm.c
> @@ -271,71 +271,59 @@ void enter_smm(struct kvm_vcpu *vcpu)
> kvm_mmu_reset_context(vcpu);
> }
>
> -static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
> -{
> -#ifdef CONFIG_X86_64
> - return ctxt->ops->guest_has_long_mode(ctxt);
> -#else
> - return false;
> -#endif
> -}
> -
> -static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
> +static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
> {
> desc->g = (flags >> 23) & 1;
> - desc->d = (flags >> 22) & 1;
> + desc->db = (flags >> 22) & 1;
> desc->l = (flags >> 21) & 1;
> desc->avl = (flags >> 20) & 1;
> - desc->p = (flags >> 15) & 1;
> + desc->present = (flags >> 15) & 1;
> desc->dpl = (flags >> 13) & 3;
> desc->s = (flags >> 12) & 1;
> desc->type = (flags >> 8) & 15;
> +
> + desc->unusable = !desc->present;
> + desc->padding = 0;
> }
>
> -static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
> +static int rsm_load_seg_32(struct kvm_vcpu *vcpu, const char *smstate,
> int n)
> {
> - struct desc_struct desc;
> + struct kvm_segment desc;
> int offset;
> - u16 selector;
> -
> - selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
>
> if (n < 3)
> offset = 0x7f84 + n * 12;
> else
> offset = 0x7f2c + (n - 3) * 12;
>
> - set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
> - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
> + desc.selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
> + desc.base = GET_SMSTATE(u32, smstate, offset + 8);
> + desc.limit = GET_SMSTATE(u32, smstate, offset + 4);
> rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
> - ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
> + kvm_set_segment(vcpu, &desc, n);
> return X86EMUL_CONTINUE;
> }
>
> #ifdef CONFIG_X86_64
> -static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
> +static int rsm_load_seg_64(struct kvm_vcpu *vcpu, const char *smstate,
> int n)
> {
> - struct desc_struct desc;
> + struct kvm_segment desc;
> int offset;
> - u16 selector;
> - u32 base3;
>
> offset = 0x7e00 + n * 16;
>
> - selector = GET_SMSTATE(u16, smstate, offset);
> + desc.selector = GET_SMSTATE(u16, smstate, offset);
> rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
> - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
> - set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
> - base3 = GET_SMSTATE(u32, smstate, offset + 12);
> -
> - ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
> + desc.limit = GET_SMSTATE(u32, smstate, offset + 4);
> + desc.base = GET_SMSTATE(u64, smstate, offset + 8);
> + kvm_set_segment(vcpu, &desc, n);
> return X86EMUL_CONTINUE;
> }
> #endif
>
> -static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
> +static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
> u64 cr0, u64 cr3, u64 cr4)
> {
> int bad;
> @@ -348,7 +336,7 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
> cr3 &= ~0xfff;
> }
>
> - bad = ctxt->ops->set_cr(ctxt, 3, cr3);
> + bad = kvm_set_cr3(vcpu, cr3);
> if (bad)
> return X86EMUL_UNHANDLEABLE;
>
> @@ -357,20 +345,20 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
> * Then enable protected mode. However, PCID cannot be enabled
> * if EFER.LMA=0, so set it separately.
> */
> - bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
> + bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
> if (bad)
> return X86EMUL_UNHANDLEABLE;
>
> - bad = ctxt->ops->set_cr(ctxt, 0, cr0);
> + bad = kvm_set_cr0(vcpu, cr0);
> if (bad)
> return X86EMUL_UNHANDLEABLE;
>
> if (cr4 & X86_CR4_PCIDE) {
> - bad = ctxt->ops->set_cr(ctxt, 4, cr4);
> + bad = kvm_set_cr4(vcpu, cr4);
> if (bad)
> return X86EMUL_UNHANDLEABLE;
> if (pcid) {
> - bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
> + bad = kvm_set_cr3(vcpu, cr3 | pcid);
> if (bad)
> return X86EMUL_UNHANDLEABLE;
> }
> @@ -383,9 +371,9 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
> static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
> const char *smstate)
> {
> - struct desc_struct desc;
> + struct kvm_vcpu *vcpu = ctxt->vcpu;
> + struct kvm_segment desc;
> struct desc_ptr dt;
> - u16 selector;
> u32 val, cr0, cr3, cr4;
> int i;
>
> @@ -399,56 +387,55 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
>
> val = GET_SMSTATE(u32, smstate, 0x7fcc);
>
> - if (ctxt->ops->set_dr(ctxt, 6, val))
> + if (kvm_set_dr(vcpu, 6, val))
> return X86EMUL_UNHANDLEABLE;
>
> val = GET_SMSTATE(u32, smstate, 0x7fc8);
>
> - if (ctxt->ops->set_dr(ctxt, 7, val))
> + if (kvm_set_dr(vcpu, 7, val))
> return X86EMUL_UNHANDLEABLE;
>
> - selector = GET_SMSTATE(u32, smstate, 0x7fc4);
> - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
> - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
> + desc.selector = GET_SMSTATE(u32, smstate, 0x7fc4);
> + desc.base = GET_SMSTATE(u32, smstate, 0x7f64);
> + desc.limit = GET_SMSTATE(u32, smstate, 0x7f60);
> rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
> - ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
> + kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);
>
> - selector = GET_SMSTATE(u32, smstate, 0x7fc0);
> - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
> - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
> + desc.selector = GET_SMSTATE(u32, smstate, 0x7fc0);
> + desc.base = GET_SMSTATE(u32, smstate, 0x7f80);
> + desc.limit = GET_SMSTATE(u32, smstate, 0x7f7c);
> rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
> - ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
> + kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);
>
> dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
> dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
> - ctxt->ops->set_gdt(ctxt, &dt);
> + static_call(kvm_x86_set_gdt)(vcpu, &dt);
>
> dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
> dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
> - ctxt->ops->set_idt(ctxt, &dt);
> + static_call(kvm_x86_set_idt)(vcpu, &dt);
>
> for (i = 0; i < 6; i++) {
> - int r = rsm_load_seg_32(ctxt, smstate, i);
> + int r = rsm_load_seg_32(vcpu, smstate, i);
> if (r != X86EMUL_CONTINUE)
> return r;
> }
>
> cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
>
> - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
> + vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7ef8);
>
> - return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
> + return rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
> }
>
> #ifdef CONFIG_X86_64
> static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
> const char *smstate)
> {
> - struct desc_struct desc;
> + struct kvm_vcpu *vcpu = ctxt->vcpu;
> + struct kvm_segment desc;
> struct desc_ptr dt;
> u64 val, cr0, cr3, cr4;
> - u32 base3;
> - u16 selector;
> int i, r;
>
> for (i = 0; i < NR_EMULATOR_GPRS; i++)
> @@ -459,51 +446,49 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
>
> val = GET_SMSTATE(u64, smstate, 0x7f68);
>
> - if (ctxt->ops->set_dr(ctxt, 6, val))
> + if (kvm_set_dr(vcpu, 6, val))
> return X86EMUL_UNHANDLEABLE;
>
> val = GET_SMSTATE(u64, smstate, 0x7f60);
>
> - if (ctxt->ops->set_dr(ctxt, 7, val))
> + if (kvm_set_dr(vcpu, 7, val))
> return X86EMUL_UNHANDLEABLE;
>
> cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
> cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
> cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
> - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
> + vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7f00);
> val = GET_SMSTATE(u64, smstate, 0x7ed0);
>
> - if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
> + if (kvm_set_msr(vcpu, MSR_EFER, val & ~EFER_LMA))
> return X86EMUL_UNHANDLEABLE;
>
> - selector = GET_SMSTATE(u32, smstate, 0x7e90);
> + desc.selector = GET_SMSTATE(u32, smstate, 0x7e90);
> rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
> - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
> - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
> - base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
> - ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
> + desc.limit = GET_SMSTATE(u32, smstate, 0x7e94);
> + desc.base = GET_SMSTATE(u64, smstate, 0x7e98);
> + kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);
>
> dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
> dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
> - ctxt->ops->set_idt(ctxt, &dt);
> + static_call(kvm_x86_set_idt)(vcpu, &dt);
>
> - selector = GET_SMSTATE(u32, smstate, 0x7e70);
> + desc.selector = GET_SMSTATE(u32, smstate, 0x7e70);
> rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
> - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
> - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
> - base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
> - ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
> + desc.limit = GET_SMSTATE(u32, smstate, 0x7e74);
> + desc.base = GET_SMSTATE(u64, smstate, 0x7e78);
> + kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);
>
> dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
> dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
> - ctxt->ops->set_gdt(ctxt, &dt);
> + static_call(kvm_x86_set_gdt)(vcpu, &dt);
>
> - r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
> + r = rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
> if (r != X86EMUL_CONTINUE)
> return r;
>
> for (i = 0; i < 6; i++) {
> - r = rsm_load_seg_64(ctxt, smstate, i);
> + r = rsm_load_seg_64(vcpu, smstate, i);
> if (r != X86EMUL_CONTINUE)
> return r;
> }
> @@ -520,14 +505,14 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
> u64 smbase;
> int ret;
>
> - smbase = ctxt->ops->get_smbase(ctxt);
> + smbase = vcpu->arch.smbase;
>
> - ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
> - if (ret != X86EMUL_CONTINUE)
> + ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf));
> + if (ret < 0)
> return X86EMUL_UNHANDLEABLE;
>
> - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
> - ctxt->ops->set_nmi_mask(ctxt, false);
> + if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
> + static_call(kvm_x86_set_nmi_mask)(vcpu, false);
>
> kvm_smm_changed(vcpu, false);
>
> @@ -535,41 +520,41 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
> * Get back to real mode, to prepare a safe state in which to load
> * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
> * supports long mode.
> - *
> - * The ctxt->ops callbacks will handle all side effects when writing
> - * writing MSRs and CRs, e.g. MMU context resets, CPUID
> - * runtime updates, etc.
> */
> - if (emulator_has_longmode(ctxt)) {
> - struct desc_struct cs_desc;
> +#ifdef CONFIG_X86_64
> + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
> + struct kvm_segment cs_desc;
>
> /* Zero CR4.PCIDE before CR0.PG. */
> - cr4 = ctxt->ops->get_cr(ctxt, 4);
> + cr4 = kvm_read_cr4(vcpu);
> if (cr4 & X86_CR4_PCIDE)
> - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
> + kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
>
> /* A 32-bit code segment is required to clear EFER.LMA. */
> memset(&cs_desc, 0, sizeof(cs_desc));
> cs_desc.type = 0xb;
> - cs_desc.s = cs_desc.g = cs_desc.p = 1;
> - ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
> + cs_desc.s = cs_desc.g = cs_desc.present = 1;
> + kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
> }
> +#endif
>
> /* For the 64-bit case, this will clear EFER.LMA. */
> - cr0 = ctxt->ops->get_cr(ctxt, 0);
> + cr0 = kvm_read_cr0(vcpu);
> if (cr0 & X86_CR0_PE)
> - ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
> + kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
>
> - if (emulator_has_longmode(ctxt)) {
> +#ifdef CONFIG_X86_64
> + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
> /* Clear CR4.PAE before clearing EFER.LME. */
> - cr4 = ctxt->ops->get_cr(ctxt, 4);
> + cr4 = kvm_read_cr4(vcpu);
> if (cr4 & X86_CR4_PAE)
> - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
> + kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
>
> /* And finally go back to 32-bit mode. */
> efer = 0;
> - ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
> + kvm_set_msr(vcpu, MSR_EFER, efer);
> }
> +#endif
>
> /*
> * Give leave_smm() a chance to make ISA-specific changes to the vCPU
> @@ -580,7 +565,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
> return X86EMUL_UNHANDLEABLE;
>
> #ifdef CONFIG_X86_64
> - if (emulator_has_longmode(ctxt))
> + if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
> return rsm_load_state_64(ctxt, buf);
> else
> #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 97d6ee179109..97a871635986 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7201,15 +7201,6 @@ static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
> return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
> }
>
> -static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
> - unsigned long addr, void *val, unsigned int bytes)
> -{
> - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
> - int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
> -
> - return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
> -}
> -
> static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
> struct kvm_vcpu *vcpu, u64 access,
> struct x86_exception *exception)
> @@ -8001,26 +7992,6 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
> return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
> }
>
> -static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
> - u32 msr_index, u64 data)
> -{
> - return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
> -}
> -
> -static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
> -{
> - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
> -
> - return vcpu->arch.smbase;
> -}
> -
> -static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
> -{
> - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
> -
> - vcpu->arch.smbase = smbase;
> -}
> -
> static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
> u32 pmc)
> {
> @@ -8119,7 +8090,6 @@ static const struct x86_emulate_ops emulate_ops = {
> .write_gpr = emulator_write_gpr,
> .read_std = emulator_read_std,
> .write_std = emulator_write_std,
> - .read_phys = kvm_read_guest_phys_system,
> .fetch = kvm_fetch_guest_virt,
> .read_emulated = emulator_read_emulated,
> .write_emulated = emulator_write_emulated,
> @@ -8139,11 +8109,8 @@ static const struct x86_emulate_ops emulate_ops = {
> .cpl = emulator_get_cpl,
> .get_dr = emulator_get_dr,
> .set_dr = emulator_set_dr,
> - .get_smbase = emulator_get_smbase,
> - .set_smbase = emulator_set_smbase,
> .set_msr_with_filter = emulator_set_msr_with_filter,
> .get_msr_with_filter = emulator_get_msr_with_filter,
> - .set_msr = emulator_set_msr,
> .get_msr = emulator_get_msr,
> .check_pmc = emulator_check_pmc,
> .read_pmc = emulator_read_pmc,

Reviewed-by: Maxim Levitsky <[email protected]>

Best regards,
Maxim Levitsky



2022-10-24 19:44:13

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v2 7/8] KVM: x86: remove SMRAM address space if SMM is not supported

On Thu, 2022-09-29 at 13:20 -0400, Paolo Bonzini wrote:
> If CONFIG_KVM_SMM is not defined HF_SMM_MASK will always be zero, and
> we can spare userspace the hassle of setting up the SMRAM address space
> simply by reporting that only one address space is supported.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 13 ++++++++-----
> 1 file changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index cb88da02d965..d11697504471 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1994,11 +1994,14 @@ enum {
> #define HF_SMM_MASK (1 << 6)
> #define HF_SMM_INSIDE_NMI_MASK (1 << 7)
>
> -#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
> -#define KVM_ADDRESS_SPACE_NUM 2
> -
> -#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
> -#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
> +#ifdef CONFIG_KVM_SMM
> +# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
> +# define KVM_ADDRESS_SPACE_NUM 2
> +# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
> +# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
> +#else
> +# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
> +#endif
>
> #define KVM_ARCH_WANT_MMU_NOTIFIER
>

Reviewed-by: Maxim Levitsky <[email protected]>

Best regards,
Maxim Levitsky

2022-10-24 22:31:02

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v2 5/8] KVM: allow compiling out SMM support

On Thu, 2022-09-29 at 13:20 -0400, Paolo Bonzini wrote:
> Some users of KVM implement the UEFI variable store through a paravirtual device
> that does not require the "SMM lockbox" component of edk2; allow them to
> compile out system management mode, which is not a full implementation
> especially in how it interacts with nested virtualization.
>
> Suggested-by: Sean Christopherson <[email protected]>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/Kconfig | 11 ++++++++++
> arch/x86/kvm/Makefile | 2 +-
> arch/x86/kvm/smm.h | 13 ++++++++++++
> arch/x86/kvm/svm/svm.c | 2 ++
> arch/x86/kvm/vmx/vmx.c | 2 ++
> arch/x86/kvm/x86.c | 21 +++++++++++++++++--
> tools/testing/selftests/kvm/x86_64/smm_test.c | 2 ++
> 7 files changed, 50 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index a107df22ffee..1679f9b4e96d 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -88,6 +88,17 @@ config KVM_INTEL
> To compile this as a module, choose M here: the module
> will be called kvm-intel.
>
> +config KVM_SMM
> + bool "System Management Mode emulation"
> + default y
> + depends on KVM
> + help
> + Provides support for KVM to emulate System Management Mode (SMM)
> + in virtual machines. This can be used by the virtual machine
> + firmware to implement UEFI secure boot.
> +
> + If unsure, say Y.
> +
> config X86_SGX_KVM
> bool "Software Guard eXtensions (SGX) Virtualization"
> depends on X86_SGX && KVM_INTEL
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index ec6f7656254b..6cf40f668277 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -20,7 +20,7 @@ endif
>
> kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
> kvm-$(CONFIG_KVM_XEN) += xen.o
> -kvm-y += smm.o
> +kvm-$(CONFIG_KVM_SMM) += smm.o
>
> kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
> vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
> diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
> index b0602a92e511..4c699fee4492 100644
> --- a/arch/x86/kvm/smm.h
> +++ b/arch/x86/kvm/smm.h
> @@ -8,6 +8,7 @@
> #define PUT_SMSTATE(type, buf, offset, val) \
> *(type *)((buf) + (offset) - 0x7e00) = val
>
> +#ifdef CONFIG_KVM_SMM
> static inline int kvm_inject_smi(struct kvm_vcpu *vcpu)
> {
> kvm_make_request(KVM_REQ_SMI, vcpu);
> @@ -23,5 +24,17 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
> void enter_smm(struct kvm_vcpu *vcpu);
> int emulator_leave_smm(struct x86_emulate_ctxt *ctxt);
> void process_smi(struct kvm_vcpu *vcpu);
> +#else
> +static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
> +static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
> +static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); }
> +static inline void enter_smm(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
> +static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
> +
> +/*
> + * emulator_leave_smm is used as a function pointer, so the
> + * stub is defined in x86.c.

Took me a while to figure out this comment.
I think it might be worth it to remove it, I don't think it helps much.

Or maybe even #ifdef the .leave_smm out of emulate_ops and check for NULL pointer,
in em_rsm (which should just #UD at the very start of it anyway.

Besides this:

Reviewed-by: Maxim Levitsky <[email protected]>


Best regards,
Maxim Levitsky



> + */
> +#endif
>
> #endif
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 496ee7d1ae2f..6f7ceb35d2ff 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4150,6 +4150,8 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
> case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
> return false;
> case MSR_IA32_SMBASE:
> + if (!IS_ENABLED(CONFIG_KVM_SMM))
> + return false;
> /* SEV-ES guests do not support SMM, so report false */
> if (kvm && sev_es_guest(kvm))
> return false;
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 038809c68006..b22330a15adb 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6841,6 +6841,8 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
> {
> switch (index) {
> case MSR_IA32_SMBASE:
> + if (!IS_ENABLED(CONFIG_KVM_SMM))
> + return false;
> /*
> * We cannot do SMM unless we can run the guest in big
> * real mode.
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 97a871635986..a9e050aefea6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3636,7 +3636,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> break;
> }
> case MSR_IA32_SMBASE:
> - if (!msr_info->host_initiated)
> + if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated)
> return 1;
> vcpu->arch.smbase = data;
> break;
> @@ -4052,7 +4052,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> msr_info->data = vcpu->arch.ia32_misc_enable_msr;
> break;
> case MSR_IA32_SMBASE:
> - if (!msr_info->host_initiated)
> + if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated)
> return 1;
> msr_info->data = vcpu->arch.smbase;
> break;
> @@ -4426,6 +4426,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> r |= KVM_X86_DISABLE_EXITS_MWAIT;
> break;
> case KVM_CAP_X86_SMM:
> + if (!IS_ENABLED(CONFIG_KVM_SMM))
> + break;
> +
> /* SMBASE is usually relocated above 1M on modern chipsets,
> * and SMM handlers might indeed rely on 4G segment limits,
> * so do not report SMM to be available if real mode is
> @@ -5176,6 +5179,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
> vcpu->arch.apic->sipi_vector = events->sipi_vector;
>
> if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
> + if (!IS_ENABLED(CONFIG_KVM_SMM) &&
> + (events->smi.smm ||
> + events->smi.pending ||
> + events->smi.smm_inside_nmi))
> + return -EINVAL;
> +
> if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
> kvm_x86_ops.nested_ops->leave_nested(vcpu);
> kvm_smm_changed(vcpu, events->smi.smm);
> @@ -8066,6 +8075,14 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
> return emul_to_vcpu(ctxt)->arch.hflags;
> }
>
> +#ifndef CONFIG_KVM_SMM
> +static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
> +{
> + WARN_ON_ONCE(1);
> + return X86EMUL_UNHANDLEABLE;
> +}
> +#endif
> +
> static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
> {
> kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
> diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
> index 1f136a81858e..cb38a478e1f6 100644
> --- a/tools/testing/selftests/kvm/x86_64/smm_test.c
> +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
> @@ -137,6 +137,8 @@ int main(int argc, char *argv[])
> struct kvm_x86_state *state;
> int stage, stage_reported;
>
> + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
> +
> /* Create VM */
> vm = vm_create_with_one_vcpu(&vcpu, guest_code);
>


2022-10-24 23:50:34

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v2 2/8] KVM: x86: move SMM entry to a new file

On Thu, 2022-09-29 at 13:20 -0400, Paolo Bonzini wrote:
> Some users of KVM implement the UEFI variable store through a paravirtual
> device that does not require the "SMM lockbox" component of edk2, and
> would like to compile out system management mode. In preparation for
> that, move the SMM entry code out of x86.c and into a new file.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/smm.c | 235 +++++++++++++++++++++++++++++++
> arch/x86/kvm/smm.h | 1 +
> arch/x86/kvm/x86.c | 239 +-------------------------------
> 4 files changed, 239 insertions(+), 237 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 0ca8f28854ab..b7d078cd768d 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1838,6 +1838,7 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
> int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
>
> void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
> +void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
> int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
> void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
>
> diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
> index b91c48d91f6e..26a6859e421f 100644
> --- a/arch/x86/kvm/smm.c
> +++ b/arch/x86/kvm/smm.c
> @@ -5,6 +5,7 @@
> #include "kvm_cache_regs.h"
> #include "kvm_emulate.h"
> #include "smm.h"
> +#include "cpuid.h"
> #include "trace.h"
>
> void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
> @@ -35,3 +36,237 @@ void process_smi(struct kvm_vcpu *vcpu)
> vcpu->arch.smi_pending = true;
> kvm_make_request(KVM_REQ_EVENT, vcpu);
> }
> +
> +static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
> +{
> + u32 flags = 0;
> + flags |= seg->g << 23;
> + flags |= seg->db << 22;
> + flags |= seg->l << 21;
> + flags |= seg->avl << 20;
> + flags |= seg->present << 15;
> + flags |= seg->dpl << 13;
> + flags |= seg->s << 12;
> + flags |= seg->type << 8;
> + return flags;
> +}
> +
> +static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
> +{
> + struct kvm_segment seg;
> + int offset;
> +
> + kvm_get_segment(vcpu, &seg, n);
> + PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
> +
> + if (n < 3)
> + offset = 0x7f84 + n * 12;
> + else
> + offset = 0x7f2c + (n - 3) * 12;
> +
> + PUT_SMSTATE(u32, buf, offset + 8, seg.base);
> + PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
> + PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
> +}
> +
> +#ifdef CONFIG_X86_64
> +static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
> +{
> + struct kvm_segment seg;
> + int offset;
> + u16 flags;
> +
> + kvm_get_segment(vcpu, &seg, n);
> + offset = 0x7e00 + n * 16;
> +
> + flags = enter_smm_get_segment_flags(&seg) >> 8;
> + PUT_SMSTATE(u16, buf, offset, seg.selector);
> + PUT_SMSTATE(u16, buf, offset + 2, flags);
> + PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
> + PUT_SMSTATE(u64, buf, offset + 8, seg.base);
> +}
> +#endif
> +
> +static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
> +{
> + struct desc_ptr dt;
> + struct kvm_segment seg;
> + unsigned long val;
> + int i;
> +
> + PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
> + PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
> + PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
> + PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
> +
> + for (i = 0; i < 8; i++)
> + PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
> +
> + kvm_get_dr(vcpu, 6, &val);
> + PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
> + kvm_get_dr(vcpu, 7, &val);
> + PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
> +
> + kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
> + PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
> + PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
> + PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
> + PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
> +
> + kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
> + PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
> + PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
> + PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
> + PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
> +
> + static_call(kvm_x86_get_gdt)(vcpu, &dt);
> + PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
> + PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
> +
> + static_call(kvm_x86_get_idt)(vcpu, &dt);
> + PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
> + PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
> +
> + for (i = 0; i < 6; i++)
> + enter_smm_save_seg_32(vcpu, buf, i);
> +
> + PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
> +
> + /* revision id */
> + PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
> + PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
> +}
> +
> +#ifdef CONFIG_X86_64
> +static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
> +{
> + struct desc_ptr dt;
> + struct kvm_segment seg;
> + unsigned long val;
> + int i;
> +
> + for (i = 0; i < 16; i++)
> + PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
> +
> + PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
> + PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
> +
> + kvm_get_dr(vcpu, 6, &val);
> + PUT_SMSTATE(u64, buf, 0x7f68, val);
> + kvm_get_dr(vcpu, 7, &val);
> + PUT_SMSTATE(u64, buf, 0x7f60, val);
> +
> + PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
> + PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
> + PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
> +
> + PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
> +
> + /* revision id */
> + PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
> +
> + PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
> +
> + kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
> + PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
> + PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
> + PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
> + PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
> +
> + static_call(kvm_x86_get_idt)(vcpu, &dt);
> + PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
> + PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
> +
> + kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
> + PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
> + PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
> + PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
> + PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
> +
> + static_call(kvm_x86_get_gdt)(vcpu, &dt);
> + PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
> + PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
> +
> + for (i = 0; i < 6; i++)
> + enter_smm_save_seg_64(vcpu, buf, i);
> +}
> +#endif
> +
> +void enter_smm(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_segment cs, ds;
> + struct desc_ptr dt;
> + unsigned long cr0;
> + char buf[512];
> +
> + memset(buf, 0, 512);
> +#ifdef CONFIG_X86_64
> + if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
> + enter_smm_save_state_64(vcpu, buf);
> + else
> +#endif
> + enter_smm_save_state_32(vcpu, buf);
> +
> + /*
> + * Give enter_smm() a chance to make ISA-specific changes to the vCPU
> + * state (e.g. leave guest mode) after we've saved the state into the
> + * SMM state-save area.
> + */
> + static_call(kvm_x86_enter_smm)(vcpu, buf);
> +
> + kvm_smm_changed(vcpu, true);
> + kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
> +
> + if (static_call(kvm_x86_get_nmi_mask)(vcpu))
> + vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
> + else
> + static_call(kvm_x86_set_nmi_mask)(vcpu, true);
> +
> + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
> + kvm_rip_write(vcpu, 0x8000);
> +
> + cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
> + static_call(kvm_x86_set_cr0)(vcpu, cr0);
> + vcpu->arch.cr0 = cr0;
> +
> + static_call(kvm_x86_set_cr4)(vcpu, 0);
> +
> + /* Undocumented: IDT limit is set to zero on entry to SMM. */
> + dt.address = dt.size = 0;
> + static_call(kvm_x86_set_idt)(vcpu, &dt);
> +
> + kvm_set_dr(vcpu, 7, DR7_FIXED_1);
> +
> + cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
> + cs.base = vcpu->arch.smbase;
> +
> + ds.selector = 0;
> + ds.base = 0;
> +
> + cs.limit = ds.limit = 0xffffffff;
> + cs.type = ds.type = 0x3;
> + cs.dpl = ds.dpl = 0;
> + cs.db = ds.db = 0;
> + cs.s = ds.s = 1;
> + cs.l = ds.l = 0;
> + cs.g = ds.g = 1;
> + cs.avl = ds.avl = 0;
> + cs.present = ds.present = 1;
> + cs.unusable = ds.unusable = 0;
> + cs.padding = ds.padding = 0;
> +
> + kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
> + kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
> + kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
> + kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
> + kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
> + kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
> +
> +#ifdef CONFIG_X86_64
> + if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
> + static_call(kvm_x86_set_efer)(vcpu, 0);
> +#endif
> +
> + kvm_update_cpuid_runtime(vcpu);
> + kvm_mmu_reset_context(vcpu);
> +}
> diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
> index d85d4ccd32dd..aacc6dac2c99 100644
> --- a/arch/x86/kvm/smm.h
> +++ b/arch/x86/kvm/smm.h
> @@ -20,6 +20,7 @@ static inline bool is_smm(struct kvm_vcpu *vcpu)
> }
>
> void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
> +void enter_smm(struct kvm_vcpu *vcpu);
> void process_smi(struct kvm_vcpu *vcpu);
>
> #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e0e461958c81..476b4a6e81ab 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -120,7 +120,6 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
>
> static void update_cr8_intercept(struct kvm_vcpu *vcpu);
> static void process_nmi(struct kvm_vcpu *vcpu);
> -static void enter_smm(struct kvm_vcpu *vcpu);
> static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
> static void store_regs(struct kvm_vcpu *vcpu);
> static int sync_regs(struct kvm_vcpu *vcpu);
> @@ -7043,8 +7042,8 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
> return handled;
> }
>
> -static void kvm_set_segment(struct kvm_vcpu *vcpu,
> - struct kvm_segment *var, int seg)
> +void kvm_set_segment(struct kvm_vcpu *vcpu,
> + struct kvm_segment *var, int seg)
> {
> static_call(kvm_x86_set_segment)(vcpu, var, seg);
> }
> @@ -9968,240 +9967,6 @@ static void process_nmi(struct kvm_vcpu *vcpu)
> kvm_make_request(KVM_REQ_EVENT, vcpu);
> }
>
> -static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
> -{
> - u32 flags = 0;
> - flags |= seg->g << 23;
> - flags |= seg->db << 22;
> - flags |= seg->l << 21;
> - flags |= seg->avl << 20;
> - flags |= seg->present << 15;
> - flags |= seg->dpl << 13;
> - flags |= seg->s << 12;
> - flags |= seg->type << 8;
> - return flags;
> -}
> -
> -static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
> -{
> - struct kvm_segment seg;
> - int offset;
> -
> - kvm_get_segment(vcpu, &seg, n);
> - PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
> -
> - if (n < 3)
> - offset = 0x7f84 + n * 12;
> - else
> - offset = 0x7f2c + (n - 3) * 12;
> -
> - PUT_SMSTATE(u32, buf, offset + 8, seg.base);
> - PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
> - PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
> -}
> -
> -#ifdef CONFIG_X86_64
> -static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
> -{
> - struct kvm_segment seg;
> - int offset;
> - u16 flags;
> -
> - kvm_get_segment(vcpu, &seg, n);
> - offset = 0x7e00 + n * 16;
> -
> - flags = enter_smm_get_segment_flags(&seg) >> 8;
> - PUT_SMSTATE(u16, buf, offset, seg.selector);
> - PUT_SMSTATE(u16, buf, offset + 2, flags);
> - PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
> - PUT_SMSTATE(u64, buf, offset + 8, seg.base);
> -}
> -#endif
> -
> -static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
> -{
> - struct desc_ptr dt;
> - struct kvm_segment seg;
> - unsigned long val;
> - int i;
> -
> - PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
> - PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
> - PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
> - PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
> -
> - for (i = 0; i < 8; i++)
> - PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
> -
> - kvm_get_dr(vcpu, 6, &val);
> - PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
> - kvm_get_dr(vcpu, 7, &val);
> - PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
> -
> - kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
> - PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
> - PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
> - PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
> - PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
> -
> - kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
> - PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
> - PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
> - PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
> - PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
> -
> - static_call(kvm_x86_get_gdt)(vcpu, &dt);
> - PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
> - PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
> -
> - static_call(kvm_x86_get_idt)(vcpu, &dt);
> - PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
> - PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
> -
> - for (i = 0; i < 6; i++)
> - enter_smm_save_seg_32(vcpu, buf, i);
> -
> - PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
> -
> - /* revision id */
> - PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
> - PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
> -}
> -
> -#ifdef CONFIG_X86_64
> -static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
> -{
> - struct desc_ptr dt;
> - struct kvm_segment seg;
> - unsigned long val;
> - int i;
> -
> - for (i = 0; i < 16; i++)
> - PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
> -
> - PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
> - PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
> -
> - kvm_get_dr(vcpu, 6, &val);
> - PUT_SMSTATE(u64, buf, 0x7f68, val);
> - kvm_get_dr(vcpu, 7, &val);
> - PUT_SMSTATE(u64, buf, 0x7f60, val);
> -
> - PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
> - PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
> - PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
> -
> - PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
> -
> - /* revision id */
> - PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
> -
> - PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
> -
> - kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
> - PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
> - PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
> - PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
> - PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
> -
> - static_call(kvm_x86_get_idt)(vcpu, &dt);
> - PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
> - PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
> -
> - kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
> - PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
> - PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
> - PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
> - PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
> -
> - static_call(kvm_x86_get_gdt)(vcpu, &dt);
> - PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
> - PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
> -
> - for (i = 0; i < 6; i++)
> - enter_smm_save_seg_64(vcpu, buf, i);
> -}
> -#endif
> -
> -static void enter_smm(struct kvm_vcpu *vcpu)
> -{
> - struct kvm_segment cs, ds;
> - struct desc_ptr dt;
> - unsigned long cr0;
> - char buf[512];
> -
> - memset(buf, 0, 512);
> -#ifdef CONFIG_X86_64
> - if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
> - enter_smm_save_state_64(vcpu, buf);
> - else
> -#endif
> - enter_smm_save_state_32(vcpu, buf);
> -
> - /*
> - * Give enter_smm() a chance to make ISA-specific changes to the vCPU
> - * state (e.g. leave guest mode) after we've saved the state into the
> - * SMM state-save area.
> - */
> - static_call(kvm_x86_enter_smm)(vcpu, buf);
> -
> - kvm_smm_changed(vcpu, true);
> - kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
> -
> - if (static_call(kvm_x86_get_nmi_mask)(vcpu))
> - vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
> - else
> - static_call(kvm_x86_set_nmi_mask)(vcpu, true);
> -
> - kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
> - kvm_rip_write(vcpu, 0x8000);
> -
> - cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
> - static_call(kvm_x86_set_cr0)(vcpu, cr0);
> - vcpu->arch.cr0 = cr0;
> -
> - static_call(kvm_x86_set_cr4)(vcpu, 0);
> -
> - /* Undocumented: IDT limit is set to zero on entry to SMM. */
> - dt.address = dt.size = 0;
> - static_call(kvm_x86_set_idt)(vcpu, &dt);
> -
> - kvm_set_dr(vcpu, 7, DR7_FIXED_1);
> -
> - cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
> - cs.base = vcpu->arch.smbase;
> -
> - ds.selector = 0;
> - ds.base = 0;
> -
> - cs.limit = ds.limit = 0xffffffff;
> - cs.type = ds.type = 0x3;
> - cs.dpl = ds.dpl = 0;
> - cs.db = ds.db = 0;
> - cs.s = ds.s = 1;
> - cs.l = ds.l = 0;
> - cs.g = ds.g = 1;
> - cs.avl = ds.avl = 0;
> - cs.present = ds.present = 1;
> - cs.unusable = ds.unusable = 0;
> - cs.padding = ds.padding = 0;
> -
> - kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
> - kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
> - kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
> - kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
> - kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
> - kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
> -
> -#ifdef CONFIG_X86_64
> - if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
> - static_call(kvm_x86_set_efer)(vcpu, 0);
> -#endif
> -
> - kvm_update_cpuid_runtime(vcpu);
> - kvm_mmu_reset_context(vcpu);
> -}
> -
> void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
> unsigned long *vcpu_bitmap)
> {


Looks OK to me, but I might have missed something.

Reviewed-by: Maxim Levitsky <[email protected]>

Best regards,
Maxim Levitsky