Turn off host updates to the registered kvmclock memory
locations when transitioning to a hibernated kernel in
resume_target_kernel().
This is accomplished for secondary vcpus by disabling host
clock updates for that vcpu when it is put offline. For the
primary vcpu, it's accomplished by using the existing call back
from save_processor_state() to kvm_save_sched_clock_state().
The registered kvmclock memory locations may differ between
the currently running kernel and the hibernated kernel, which
is being restored and resumed. Kernel memory corruption is thus
possible if the host clock updates are allowed to run while the
hibernated kernel is relocated to its original physical memory
locations.
This is similar to the problem solved for kexec by
commit 1e977aa12dd4 ("x86: KVM guest: disable clock before rebooting.")
Commit 95a3d4454bb1 ("x86/kvmclock: Switch kvmclock data to a
PER_CPU variable") innocently increased the exposure for this
problem by dynamically allocating the physical pages that are
used for host clock updates when the vcpu count exceeds 64.
This increases the likelihood that the registered kvmclock
locations will differ for vcpus above 64.
Reported-by: Xiaoyi Chen <[email protected]>
Tested-by: Mohamed Aboubakr <[email protected]>
Signed-off-by: Lenny Szubowicz <[email protected]>
---
arch/x86/kernel/kvmclock.c | 40 ++++++++++++++++++++++++++++++++++----
1 file changed, 36 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1fc0962c89c0..0d39906b9df0 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -187,8 +187,17 @@ static void kvm_register_clock(char *txt)
pr_info("kvm-clock: cpu %d, msr %llx, %s", smp_processor_id(), pa, txt);
}
+/*
+ * Turn off host clock updates to the registered memory location when the
+ * cpu clock context is saved via save_processor_state(). Enables correct
+ * handling of the primary cpu clock when transitioning to a hibernated
+ * kernel in resume_target_kernel(), where the old and new registered
+ * memory locations may differ.
+ */
static void kvm_save_sched_clock_state(void)
{
+ native_write_msr(msr_kvm_system_time, 0, 0);
+ kvm_disable_steal_time();
}
static void kvm_restore_sched_clock_state(void)
@@ -310,9 +319,22 @@ static int kvmclock_setup_percpu(unsigned int cpu)
return p ? 0 : -ENOMEM;
}
+/*
+ * Turn off host clock updates to the registered memory location when a
+ * cpu is placed offline. Enables correct handling of secondary cpu clocks
+ * when transitioning to a hibernated kernel in resume_target_kernel(),
+ * where the old and new registered memory locations may differ.
+ */
+static int kvmclock_cpu_offline(unsigned int cpu)
+{
+ native_write_msr(msr_kvm_system_time, 0, 0);
+ return 0;
+}
+
void __init kvmclock_init(void)
{
u8 flags;
+ int cpuhp_prepare;
if (!kvm_para_available() || !kvmclock)
return;
@@ -324,10 +346,14 @@ void __init kvmclock_init(void)
return;
}
- if (cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "kvmclock:setup_percpu",
- kvmclock_setup_percpu, NULL) < 0) {
- return;
- }
+ cpuhp_prepare = cpuhp_setup_state(CPUHP_BP_PREPARE_DYN,
+ "kvmclock:setup_percpu",
+ kvmclock_setup_percpu, NULL);
+ if (cpuhp_prepare < 0)
+ goto cpuhp_setup_err1;
+ if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "kvmclock:cpu_offline",
+ NULL, kvmclock_cpu_offline) < 0)
+ goto cpuhp_setup_err2;
pr_info("kvm-clock: Using msrs %x and %x",
msr_kvm_system_time, msr_kvm_wall_clock);
@@ -372,4 +398,10 @@ void __init kvmclock_init(void)
clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
pv_info.name = "KVM";
+ return;
+
+cpuhp_setup_err2:
+ cpuhp_remove_state(cpuhp_prepare);
+cpuhp_setup_err1:
+ pr_err("kvmclock: Init failed; error from cpu state notifier registration\n");
}
--
2.27.0
Hi Paolo,
On Thu, Mar 25, 2021 at 10:41:43PM -0400, Lenny Szubowicz wrote:
> Reported-by: Xiaoyi Chen <[email protected]>
> Tested-by: Mohamed Aboubakr <[email protected]>
> Signed-off-by: Lenny Szubowicz <[email protected]>
> ---
> arch/x86/kernel/kvmclock.c | 40 ++++++++++++++++++++++++++++++++++----
> 1 file changed, 36 insertions(+), 4 deletions(-)
What is the status of this patch? Are there any objections?
Regards,
Joerg
On 08/07/21 09:59, Joerg Roedel wrote:
> Hi Paolo,
>
> On Thu, Mar 25, 2021 at 10:41:43PM -0400, Lenny Szubowicz wrote:
>> Reported-by: Xiaoyi Chen <[email protected]>
>> Tested-by: Mohamed Aboubakr <[email protected]>
>> Signed-off-by: Lenny Szubowicz <[email protected]>
>> ---
>> arch/x86/kernel/kvmclock.c | 40 ++++++++++++++++++++++++++++++++++----
>> 1 file changed, 36 insertions(+), 4 deletions(-)
>
> What is the status of this patch? Are there any objections?
It was replaced by these:
0a269a008f83 x86/kvm: Fix pr_info() for async PF setup/teardown
8b79feffeca2 x86/kvm: Teardown PV features on boot CPU as well
c02027b5742b x86/kvm: Disable kvmclock on all CPUs on shutdown
3d6b84132d2a x86/kvm: Disable all PV features on crash
384fc672f528 x86/kvm: Unify kvm_pv_guest_cpu_reboot() with kvm_guest_cpu_offline()
Thanks,
Paolo