Currently, kdump just makes all the logical processors leave VMX operation by
executing VMXOFF instruction, so any VMCSs active on the logical processors may
be corrupted. But, sometimes, we need the VMCSs to debug guest images contained
in the host vmcore. To prevent the corruption, we should VMCLEAR the VMCSs before
executing the VMXOFF instruction.
The patch set provides a way to VMCLEAR vmcss related to guests on all cpus before
executing the VMXOFF when doing kdump. This is used to ensure the VMCSs in the
vmcore updated and non-corrupted.
Changelog from v3 to v4:
1. add a new percpu variable vmclear_skipped to skip
vmclear in kdump in some conditions.
Changelog from v2 to v3:
1. remove unnecessary conditions in function
cpu_emergency_clear_loaded_vmcss as Marcelo suggested.
Changelog from v1 to v2:
1. remove the sysctl and clear VMCSs unconditionally.
Zhang Yanfei (2):
x86/kexec: VMCLEAR vmcss on all cpus if necessary
KVM: set/unset crash_clear_loaded_vmcss and vmclear_skipped in
kvm_intel module
arch/x86/include/asm/kexec.h | 3 +++
arch/x86/kernel/crash.c | 32 ++++++++++++++++++++++++++++++++
arch/x86/kvm/vmx.c | 32 ++++++++++++++++++++++++++++++++
3 files changed, 67 insertions(+), 0 deletions(-)
crash_clear_loaded_vmcss is added to VMCLEAR vmcss loaded on all
cpus. And when loading kvm_intel module, the function pointer will be
made valid.
The percpu variable vmclear_skipped is added to flag the case that
if loaded_vmcss_on_cpu list is being modified while the machine crashes
and doing kdump, the vmclear here can be skipped.
Signed-off-by: Zhang Yanfei <[email protected]>
---
arch/x86/include/asm/kexec.h | 3 +++
arch/x86/kernel/crash.c | 32 ++++++++++++++++++++++++++++++++
2 files changed, 35 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 317ff17..d892211 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -163,6 +163,9 @@ struct kimage_arch {
};
#endif
+extern void (*crash_clear_loaded_vmcss)(void);
+DECLARE_PER_CPU(int, vmclear_skipped);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 13ad899..b9f264e 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -16,6 +16,7 @@
#include <linux/delay.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
+#include <linux/module.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
@@ -30,6 +31,27 @@
int in_crash_kexec;
+/*
+ * This is used to VMCLEAR vmcss loaded on all
+ * cpus. And when loading kvm_intel module, the
+ * function pointer will be made valid.
+ */
+void (*crash_clear_loaded_vmcss)(void) = NULL;
+EXPORT_SYMBOL_GPL(crash_clear_loaded_vmcss);
+
+DEFINE_PER_CPU(int, vmclear_skipped) = 1;
+EXPORT_SYMBOL_GPL(vmclear_skipped);
+
+static void cpu_emergency_clear_loaded_vmcss(void)
+{
+ int cpu = raw_smp_processor_id();
+ int skipped;
+
+ skipped = per_cpu(vmclear_skipped, cpu);
+ if (!skipped && crash_clear_loaded_vmcss)
+ crash_clear_loaded_vmcss();
+}
+
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -46,6 +68,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
#endif
crash_save_cpu(regs, cpu);
+ /*
+ * VMCLEAR vmcss loaded on all cpus if needed.
+ */
+ cpu_emergency_clear_loaded_vmcss();
+
/* Disable VMX or SVM if needed.
*
* We need to disable virtualization on all CPUs.
@@ -88,6 +115,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
kdump_nmi_shootdown_cpus();
+ /*
+ * VMCLEAR vmcss loaded on this cpu if needed.
+ */
+ cpu_emergency_clear_loaded_vmcss();
+
/* Booting kdump kernel with VMX or SVM enabled won't work,
* because (among other limitations) we can't disable paging
* with the virt flags.
--
1.7.1
Signed-off-by: Zhang Yanfei <[email protected]>
---
arch/x86/kvm/vmx.c | 32 ++++++++++++++++++++++++++++++++
1 files changed, 32 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4ff0ab9..029ec7b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -41,6 +41,7 @@
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/perf_event.h>
+#include <asm/kexec.h>
#include "trace.h"
@@ -963,6 +964,20 @@ static void vmcs_load(struct vmcs *vmcs)
vmcs, phys_addr);
}
+static inline void enable_vmclear_in_kdump(int cpu)
+{
+#ifdef CONFIG_KEXEC
+ per_cpu(vmclear_skipped, cpu) = 0;
+#endif
+}
+
+static inline void disable_vmclear_in_kdump(int cpu)
+{
+#ifdef CONFIG_KEXEC
+ per_cpu(vmclear_skipped, cpu) = 1;
+#endif
+}
+
static void __loaded_vmcs_clear(void *arg)
{
struct loaded_vmcs *loaded_vmcs = arg;
@@ -972,8 +987,10 @@ static void __loaded_vmcs_clear(void *arg)
return; /* vcpu migration can race with cpu offline */
if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
per_cpu(current_vmcs, cpu) = NULL;
+ disable_vmclear_in_kdump(cpu);
list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
loaded_vmcs_init(loaded_vmcs);
+ enable_vmclear_in_kdump(cpu);
}
static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
@@ -1491,8 +1508,10 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
local_irq_disable();
+ disable_vmclear_in_kdump(cpu);
list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
&per_cpu(loaded_vmcss_on_cpu, cpu));
+ enable_vmclear_in_kdump(cpu);
local_irq_enable();
/*
@@ -2302,6 +2321,9 @@ static int hardware_enable(void *garbage)
return -EBUSY;
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+
+ enable_vmclear_in_kdump(cpu);
+
rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
test_bits = FEATURE_CONTROL_LOCKED;
@@ -2333,6 +2355,8 @@ static void vmclear_local_loaded_vmcss(void)
list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
loaded_vmcss_on_cpu_link)
__loaded_vmcs_clear(v);
+
+ disable_vmclear_in_kdump(cpu);
}
@@ -7230,6 +7254,10 @@ static int __init vmx_init(void)
if (r)
goto out3;
+#ifdef CONFIG_KEXEC
+ crash_clear_loaded_vmcss = vmclear_local_loaded_vmcss;
+#endif
+
vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
@@ -7265,6 +7293,10 @@ static void __exit vmx_exit(void)
free_page((unsigned long)vmx_io_bitmap_b);
free_page((unsigned long)vmx_io_bitmap_a);
+#ifdef CONFIG_KEXEC
+ crash_clear_loaded_vmcss = NULL;
+#endif
+
kvm_exit();
}
--
1.7.1
Hello Marcelo,
Any thoughts?
于 2012年11月14日 17:29, zhangyanfei 写道:
> Currently, kdump just makes all the logical processors leave VMX operation by
> executing VMXOFF instruction, so any VMCSs active on the logical processors may
> be corrupted. But, sometimes, we need the VMCSs to debug guest images contained
> in the host vmcore. To prevent the corruption, we should VMCLEAR the VMCSs before
> executing the VMXOFF instruction.
>
> The patch set provides a way to VMCLEAR vmcss related to guests on all cpus before
> executing the VMXOFF when doing kdump. This is used to ensure the VMCSs in the
> vmcore updated and non-corrupted.
>
> Changelog from v3 to v4:
> 1. add a new percpu variable vmclear_skipped to skip
> vmclear in kdump in some conditions.
>
> Changelog from v2 to v3:
> 1. remove unnecessary conditions in function
> cpu_emergency_clear_loaded_vmcss as Marcelo suggested.
>
> Changelog from v1 to v2:
> 1. remove the sysctl and clear VMCSs unconditionally.
>
> Zhang Yanfei (2):
> x86/kexec: VMCLEAR vmcss on all cpus if necessary
> KVM: set/unset crash_clear_loaded_vmcss and vmclear_skipped in
> kvm_intel module
>
> arch/x86/include/asm/kexec.h | 3 +++
> arch/x86/kernel/crash.c | 32 ++++++++++++++++++++++++++++++++
> arch/x86/kvm/vmx.c | 32 ++++++++++++++++++++++++++++++++
> 3 files changed, 67 insertions(+), 0 deletions(-)
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
On Fri, Nov 16, 2012 at 06:12:58PM +0800, zhangyanfei wrote:
> Hello Marcelo,
>
> Any thoughts?
I thought a function call was OK, but its better to have all code in
vmx.c. Please have an atomic notifier in kexec.c (registered by KVM
module via atomic_notifier_chain_register etc).
Other than that, which is largely cosmetic, it looks fine.
Sorry for not expressing this earlier.
> 于 2012年11月14日 17:29, zhangyanfei 写道:
> > Currently, kdump just makes all the logical processors leave VMX operation by
> > executing VMXOFF instruction, so any VMCSs active on the logical processors may
> > be corrupted. But, sometimes, we need the VMCSs to debug guest images contained
> > in the host vmcore. To prevent the corruption, we should VMCLEAR the VMCSs before
> > executing the VMXOFF instruction.
> >
> > The patch set provides a way to VMCLEAR vmcss related to guests on all cpus before
> > executing the VMXOFF when doing kdump. This is used to ensure the VMCSs in the
> > vmcore updated and non-corrupted.
> >
> > Changelog from v3 to v4:
> > 1. add a new percpu variable vmclear_skipped to skip
> > vmclear in kdump in some conditions.
> >
> > Changelog from v2 to v3:
> > 1. remove unnecessary conditions in function
> > cpu_emergency_clear_loaded_vmcss as Marcelo suggested.
> >
> > Changelog from v1 to v2:
> > 1. remove the sysctl and clear VMCSs unconditionally.
> >
> > Zhang Yanfei (2):
> > x86/kexec: VMCLEAR vmcss on all cpus if necessary
> > KVM: set/unset crash_clear_loaded_vmcss and vmclear_skipped in
> > kvm_intel module
> >
> > arch/x86/include/asm/kexec.h | 3 +++
> > arch/x86/kernel/crash.c | 32 ++++++++++++++++++++++++++++++++
> > arch/x86/kvm/vmx.c | 32 ++++++++++++++++++++++++++++++++
> > 3 files changed, 67 insertions(+), 0 deletions(-)
> >
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at http://www.tux.org/lkml/
> >
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
于 2012年11月20日 08:32, Marcelo Tosatti 写道:
> On Fri, Nov 16, 2012 at 06:12:58PM +0800, zhangyanfei wrote:
>> Hello Marcelo,
>>
>> Any thoughts?
>
> I thought a function call was OK, but its better to have all code in
> vmx.c. Please have an atomic notifier in kexec.c (registered by KVM
> module via atomic_notifier_chain_register etc).
>
> Other than that, which is largely cosmetic, it looks fine.
> Sorry for not expressing this earlier.
>
>
Hmm, Thanks. I will resend a new patch set.
Thanks
Zhang