2010-08-20 08:08:37

by Zachary Amsden

[permalink] [raw]
Subject: [KVM timekeeping 11/35] Add helper functions for time computation

Add a helper function to compute the kernel time and convert nanoseconds
back to CPU specific cycles. Note that these must not be called in preemptible
context, as that would mean the kernel could enter software suspend state,
which would cause non-atomic operation.

Also, convert the KVM_SET_CLOCK / KVM_GET_CLOCK ioctls to use the kernel
time helper, these should be bootbased as well.

Signed-off-by: Zachary Amsden <[email protected]>
---
arch/x86/kvm/x86.c | 48 ++++++++++++++++++++++++++++--------------------
1 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 52b6c21..52680f6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -893,6 +893,16 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
hv_clock->tsc_to_system_mul);
}

+static inline u64 get_kernel_ns(void)
+{
+ struct timespec ts;
+
+ WARN_ON(preemptible());
+ ktime_get_ts(&ts);
+ monotonic_to_bootbased(&ts);
+ return timespec_to_ns(&ts);
+}
+
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);

static inline int kvm_tsc_changes_freq(void)
@@ -904,18 +914,24 @@ static inline int kvm_tsc_changes_freq(void)
return ret;
}

+static inline u64 nsec_to_cycles(u64 nsec)
+{
+ WARN_ON(preemptible());
+ if (kvm_tsc_changes_freq())
+ printk_once(KERN_WARNING
+ "kvm: unreliable cycle conversion on adjustable rate TSC\n");
+ return (nsec * __get_cpu_var(cpu_tsc_khz)) / USEC_PER_SEC;
+}
+
void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
{
struct kvm *kvm = vcpu->kvm;
u64 offset, ns, elapsed;
unsigned long flags;
- struct timespec ts;

spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = data - native_read_tsc();
- ktime_get_ts(&ts);
- monotonic_to_bootbased(&ts);
- ns = timespec_to_ns(&ts);
+ ns = get_kernel_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;

/*
@@ -931,10 +947,9 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
offset = kvm->arch.last_tsc_offset;
pr_debug("kvm: matched tsc offset for %llu\n", data);
} else {
- u64 tsc_delta = elapsed * __get_cpu_var(cpu_tsc_khz);
- tsc_delta = tsc_delta / USEC_PER_SEC;
- offset += tsc_delta;
- pr_debug("kvm: adjusted tsc offset by %llu\n", tsc_delta);
+ u64 delta = nsec_to_cycles(elapsed);
+ offset += delta;
+ pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
}
ns = kvm->arch.last_tsc_nsec;
}
@@ -951,11 +966,11 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);

static int kvm_write_guest_time(struct kvm_vcpu *v)
{
- struct timespec ts;
unsigned long flags;
struct kvm_vcpu_arch *vcpu = &v->arch;
void *shared_kaddr;
unsigned long this_tsc_khz;
+ s64 kernel_ns;

if ((!vcpu->time_page))
return 0;
@@ -963,8 +978,7 @@ static int kvm_write_guest_time(struct kvm_vcpu *v)
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
- ktime_get_ts(&ts);
- monotonic_to_bootbased(&ts);
+ kernel_ns = get_kernel_ns();
this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
local_irq_restore(flags);

@@ -979,9 +993,7 @@ static int kvm_write_guest_time(struct kvm_vcpu *v)
}

/* With all the info we got, fill in the values */
- vcpu->hv_clock.system_time = ts.tv_nsec +
- (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
-
+ vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
vcpu->hv_clock.flags = 0;

/*
@@ -3263,7 +3275,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
case KVM_SET_CLOCK: {
- struct timespec now;
struct kvm_clock_data user_ns;
u64 now_ns;
s64 delta;
@@ -3277,19 +3288,16 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;

r = 0;
- ktime_get_ts(&now);
- now_ns = timespec_to_ns(&now);
+ now_ns = get_kernel_ns();
delta = user_ns.clock - now_ns;
kvm->arch.kvmclock_offset = delta;
break;
}
case KVM_GET_CLOCK: {
- struct timespec now;
struct kvm_clock_data user_ns;
u64 now_ns;

- ktime_get_ts(&now);
- now_ns = timespec_to_ns(&now);
+ now_ns = get_kernel_ns();
user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
user_ns.flags = 0;

--
1.7.1


2010-08-20 17:34:51

by Glauber Costa

[permalink] [raw]
Subject: Re: [KVM timekeeping 11/35] Add helper functions for time computation

On Thu, Aug 19, 2010 at 10:07:25PM -1000, Zachary Amsden wrote:
> Add a helper function to compute the kernel time and convert nanoseconds
> back to CPU specific cycles. Note that these must not be called in preemptible
> context, as that would mean the kernel could enter software suspend state,
> which would cause non-atomic operation.
>
> Also, convert the KVM_SET_CLOCK / KVM_GET_CLOCK ioctls to use the kernel
> time helper, these should be bootbased as well.
This is one of the things I believe should be applied right now.
Maybe we want a cut version of this patch, that exposes this API while
adjusting KVM_SET_CLOCK / KVM_GET_CLOCK to get in early rather than late?

2010-08-24 00:59:01

by Zachary Amsden

[permalink] [raw]
Subject: Re: [KVM timekeeping 11/35] Add helper functions for time computation

On 08/20/2010 07:34 AM, Glauber Costa wrote:
> On Thu, Aug 19, 2010 at 10:07:25PM -1000, Zachary Amsden wrote:
>
>> Add a helper function to compute the kernel time and convert nanoseconds
>> back to CPU specific cycles. Note that these must not be called in preemptible
>> context, as that would mean the kernel could enter software suspend state,
>> which would cause non-atomic operation.
>>
>> Also, convert the KVM_SET_CLOCK / KVM_GET_CLOCK ioctls to use the kernel
>> time helper, these should be bootbased as well.
>>
> This is one of the things I believe should be applied right now.
> Maybe we want a cut version of this patch, that exposes this API while
> adjusting KVM_SET_CLOCK / KVM_GET_CLOCK to get in early rather than late?
>

The first half of the series, at least, is good to go upstream and ready
for backport. The trapping and later stuff obviously needs to get some
upstream testing.