Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753855AbdLHKwE (ORCPT ); Fri, 8 Dec 2017 05:52:04 -0500 Received: from mx1.redhat.com ([209.132.183.28]:52058 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753474AbdLHKub (ORCPT ); Fri, 8 Dec 2017 05:50:31 -0500 From: Vitaly Kuznetsov To: kvm@vger.kernel.org, x86@kernel.org Cc: Paolo Bonzini , =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= , Thomas Gleixner , Ingo Molnar , "H. Peter Anvin" , "K. Y. Srinivasan" , Haiyang Zhang , Stephen Hemminger , "Michael Kelley (EOSG)" , Andy Lutomirski , Mohammed Gamal , Cathy Avery , linux-kernel@vger.kernel.org, devel@linuxdriverproject.org Subject: [PATCH 5/6] x86/kvm: pass stable clocksource to guests when running nested on Hyper-V Date: Fri, 8 Dec 2017 11:49:59 +0100 Message-Id: <20171208105000.25116-6-vkuznets@redhat.com> In-Reply-To: <20171208105000.25116-1-vkuznets@redhat.com> References: <20171208105000.25116-1-vkuznets@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.27]); Fri, 08 Dec 2017 10:50:31 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8088 Lines: 244 Currently, KVM is able to work in 'masterclock' mode passing PVCLOCK_TSC_STABLE_BIT to guests when the clocksource we use on the host is TSC. When running nested on Hyper-V we normally use a different one: TSC page which is resistant to TSC frequency changes on event like L1 migration. Add support for it in KVM. The only non-trivial change in the patch is in vgettsc(): when updating our gtod copy we now need to get both the clockread and tsc value. Signed-off-by: Vitaly Kuznetsov --- RFC -> v1: - 'gtod_cs_mode_good' -> 'gtod_is_based_on_tsc' [Radim Krčmář] - rewrite vgettsc() to handle -1 from hv_read_tsc_page_tsc() [Radim Krčmář] - cycle_now -> tsc_timestamp [Radim Krčmář] --- arch/x86/kvm/x86.c | 93 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 25 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eee8e7faf1af..96e04a0cb921 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -67,6 +67,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "trace.h" @@ -1377,6 +1378,11 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) return tsc; } +static inline int gtod_is_based_on_tsc(int mode) +{ + return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK; +} + static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 @@ -1396,7 +1402,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) * perform request to enable masterclock. */ if (ka->use_master_clock || - (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched)) + (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched)) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc, @@ -1459,6 +1465,19 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) vcpu->arch.tsc_offset = offset; } +static inline bool kvm_check_tsc_unstable(void) +{ +#ifdef CONFIG_X86_64 + /* + * TSC is marked unstable when we're running on Hyper-V, + * 'TSC page' clocksource is good. + */ + if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK) + return false; +#endif + return check_tsc_unstable(); +} + void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) { struct kvm *kvm = vcpu->kvm; @@ -1504,7 +1523,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) */ if (synchronizing && vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { - if (!check_tsc_unstable()) { + if (!kvm_check_tsc_unstable()) { offset = kvm->arch.cur_tsc_offset; pr_debug("kvm: matched tsc offset for %llu\n", data); } else { @@ -1604,18 +1623,43 @@ static u64 read_tsc(void) return last; } -static inline u64 vgettsc(u64 *cycle_now) +static inline u64 vgettsc(u64 *tsc_timestamp, int *mode) { long v; struct pvclock_gtod_data *gtod = &pvclock_gtod_data; + u64 tsc_pg_val; + + switch (gtod->clock.vclock_mode) { + case VCLOCK_HVCLOCK: + tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(), + tsc_timestamp); + if (tsc_pg_val != U64_MAX) { + /* TSC page valid */ + *mode = VCLOCK_HVCLOCK; + v = (tsc_pg_val - gtod->clock.cycle_last) & + gtod->clock.mask; + } else { + /* TSC page invalid */ + *mode = VCLOCK_NONE; + } + break; + case VCLOCK_TSC: + *mode = VCLOCK_TSC; + *tsc_timestamp = read_tsc(); + v = (*tsc_timestamp - gtod->clock.cycle_last) & + gtod->clock.mask; + break; + default: + *mode = VCLOCK_NONE; + } - *cycle_now = read_tsc(); + if (*mode == VCLOCK_NONE) + *tsc_timestamp = v = 0; - v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask; return v * gtod->clock.mult; } -static int do_monotonic_boot(s64 *t, u64 *cycle_now) +static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp) { struct pvclock_gtod_data *gtod = &pvclock_gtod_data; unsigned long seq; @@ -1624,9 +1668,8 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now) do { seq = read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; ns = gtod->nsec_base; - ns += vgettsc(cycle_now); + ns += vgettsc(tsc_timestamp, &mode); ns >>= gtod->clock.shift; ns += gtod->boot_ns; } while (unlikely(read_seqcount_retry(>od->seq, seq))); @@ -1635,7 +1678,7 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now) return mode; } -static int do_realtime(struct timespec *ts, u64 *cycle_now) +static int do_realtime(struct timespec *ts, u64 *tsc_timestamp) { struct pvclock_gtod_data *gtod = &pvclock_gtod_data; unsigned long seq; @@ -1644,10 +1687,9 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now) do { seq = read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; ns = gtod->nsec_base; - ns += vgettsc(cycle_now); + ns += vgettsc(tsc_timestamp, &mode); ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); @@ -1657,25 +1699,26 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now) return mode; } -/* returns true if host is using tsc clocksource */ -static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now) +/* returns true if host is using TSC based clocksource */ +static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp) { /* checked again under seqlock below */ - if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) + if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode)) return false; - return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC; + return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns, + tsc_timestamp)); } -/* returns true if host is using tsc clocksource */ +/* returns true if host is using TSC based clocksource */ static bool kvm_get_walltime_and_clockread(struct timespec *ts, - u64 *cycle_now) + u64 *tsc_timestamp) { /* checked again under seqlock below */ - if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) + if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode)) return false; - return do_realtime(ts, cycle_now) == VCLOCK_TSC; + return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp)); } #endif @@ -2869,13 +2912,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); } - if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { + if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) { s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : rdtsc() - vcpu->arch.last_host_tsc; if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); - if (check_tsc_unstable()) { + if (kvm_check_tsc_unstable()) { u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); kvm_vcpu_write_tsc_offset(vcpu, offset); @@ -6124,9 +6167,9 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused, update_pvclock_gtod(tk); /* disable master clock if host does not trust, or does not - * use, TSC clocksource + * use, TSC based clocksource. */ - if (gtod->clock.vclock_mode != VCLOCK_TSC && + if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) && atomic_read(&kvm_guest_has_master_clock) != 0) queue_work(system_long_wq, &pvclock_gtod_work); @@ -7749,7 +7792,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, { struct kvm_vcpu *vcpu; - if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) + if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) printk_once(KERN_WARNING "kvm: SMP vm created on host with unstable TSC; " "guest TSC will not be reliable\n"); @@ -7903,7 +7946,7 @@ int kvm_arch_hardware_enable(void) return ret; local_tsc = rdtsc(); - stable = !check_tsc_unstable(); + stable = !kvm_check_tsc_unstable(); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (!stable && vcpu->cpu == smp_processor_id()) -- 2.14.3