Greatly simplified version of the patches; allocate and dealloc
all required structs at module load and unload time.
Clarification: per_cpu(var, cpu) does indeed work for not-present CPUs;
the allocations for module specific per-cpu variables are done at module
load and unload time, while the per_cpu chunks are setup for all possible
CPUs at boot time.
Signed-off-by: Zachary Amsden <[email protected]>
---
arch/x86/kvm/x86.c | 23 +++++++++++++++--------
1 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fedac9d..15d2ace 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3116,9 +3116,22 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
.notifier_call = kvmclock_cpufreq_notifier
};
+static void kvm_timer_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ tsc_khz_ref = tsc_khz;
+ cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+}
+
int kvm_arch_init(void *opaque)
{
- int r, cpu;
+ int r;
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
@@ -3150,13 +3163,7 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
- for_each_possible_cpu(cpu)
- per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
- if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
- tsc_khz_ref = tsc_khz;
- cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- }
+ kvm_timer_init();
return 0;
--
1.6.4.4
They are globals, not clearly protected by any ordering or locking, and
vulnerable to various startup races.
Instead, for variable TSC machines, register the cpufreq notifier and get
the TSC frequency directly from the cpufreq machinery. Not only is it
always right, it is also perfectly accurate, as no error prone measurement
is required.
On such machines, when a new CPU online is brought online, it isn't clear what
frequency it will start with, and it may not correspond to the reference, thus
in hardware_enable we clear the cpu_tsc_khz variable to zero and make sure
it is set before running on a VCPU.
Signed-off-by: Zachary Amsden <[email protected]>
---
arch/x86/kvm/x86.c | 30 ++++++++++++++++++++----------
1 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 15d2ace..9cbd53a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1326,6 +1326,8 @@ out:
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_x86_ops->vcpu_load(vcpu, cpu);
+ if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0))
+ per_cpu(cpu_tsc_khz, cpu) = cpufreq_quick_get(cpu);
kvm_request_guest_time_update(vcpu);
}
@@ -3061,9 +3063,6 @@ static void bounce_off(void *info)
/* nothing */
}
-static unsigned int ref_freq;
-static unsigned long tsc_khz_ref;
-
static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
@@ -3072,14 +3071,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
struct kvm_vcpu *vcpu;
int i, send_ipi = 0;
- if (!ref_freq)
- ref_freq = freq->old;
-
if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
return 0;
if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
return 0;
- per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+ per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -3120,12 +3116,18 @@ static void kvm_timer_init(void)
{
int cpu;
- for_each_possible_cpu(cpu)
- per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
- tsc_khz_ref = tsc_khz;
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
+ for_each_online_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu);
+ } else {
+ for_each_possible_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ }
+ for_each_possible_cpu(cpu) {
+ printk(KERN_DEBUG "kvm: cpu %d = %ld khz\n",
+ cpu, per_cpu(cpu_tsc_khz, cpu));
}
}
@@ -4698,6 +4700,14 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
int kvm_arch_hardware_enable(void *garbage)
{
+ /*
+ * Since this may be called from a hotplug notifcation,
+ * we can't get the CPU frequency directly.
+ */
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ int cpu = raw_smp_processor_id();
+ per_cpu(cpu_tsc_khz, cpu) = 0;
+ }
return kvm_x86_ops->hardware_enable(garbage);
}
--
1.6.4.4
Signed-off-by: Zachary Amsden <[email protected]>
---
arch/x86/kvm/svm.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9a4daca..d1036ce 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -330,13 +330,14 @@ static int svm_hardware_enable(void *garbage)
return -EBUSY;
if (!has_svm()) {
- printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
+ printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
+ me);
return -EINVAL;
}
svm_data = per_cpu(svm_data, me);
if (!svm_data) {
- printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
+ printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
me);
return -EINVAL;
}
--
1.6.4.4
Both VMX and SVM require per-cpu memory allocation, which is done at module
init time, for only online cpus.
Backend was not allocating enough structure for all possible CPUs, so
new CPUs coming online could not be hardware enabled.
Signed-off-by: Zachary Amsden <[email protected]>
---
arch/x86/kvm/svm.c | 4 ++--
arch/x86/kvm/vmx.c | 6 ++++--
virt/kvm/kvm_main.c | 3 ---
3 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d1036ce..02a4269 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -482,7 +482,7 @@ static __init int svm_hardware_setup(void)
kvm_enable_efer_bits(EFER_SVME);
}
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
goto err;
@@ -516,7 +516,7 @@ static __exit void svm_hardware_unsetup(void)
{
int cpu;
- for_each_online_cpu(cpu)
+ for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3fe0d42..e86f1a6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1350,15 +1350,17 @@ static void free_kvm_area(void)
{
int cpu;
- for_each_online_cpu(cpu)
+ for_each_possible_cpu(cpu) {
free_vmcs(per_cpu(vmxarea, cpu));
+ per_cpu(vmxarea, cpu) = NULL;
+ }
}
static __init int alloc_kvm_area(void)
{
int cpu;
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
struct vmcs *vmcs;
vmcs = alloc_vmcs_cpu(cpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e27b7a9..2cd8bc2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1716,9 +1716,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
{
int cpu = (long)v;
- if (!kvm_usage_count)
- return NOTIFY_OK;
-
val &= ~CPU_TASKS_FROZEN;
switch (val) {
case CPU_DYING:
--
1.6.4.4
On 09/29/2009 06:04 AM, Zachary Amsden wrote:
> They are globals, not clearly protected by any ordering or locking, and
> vulnerable to various startup races.
>
> Instead, for variable TSC machines, register the cpufreq notifier and get
> the TSC frequency directly from the cpufreq machinery. Not only is it
> always right, it is also perfectly accurate, as no error prone measurement
> is required.
>
> On such machines, when a new CPU online is brought online, it isn't clear what
> frequency it will start with, and it may not correspond to the reference, thus
> in hardware_enable we clear the cpu_tsc_khz variable to zero and make sure
> it is set before running on a VCPU.
>
> CPUFREQ_TRANSITION_NOTIFIER);
> + for_each_online_cpu(cpu)
> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu);
> + } else {
> + for_each_possible_cpu(cpu)
> + per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
> + }
> + for_each_possible_cpu(cpu) {
> + printk(KERN_DEBUG "kvm: cpu %d = %ld khz\n",
> + cpu, per_cpu(cpu_tsc_khz, cpu));
> }
> }
>
Leftover debug code?
--
error compiling committee.c: too many arguments to function
On 09/29/2009 06:04 AM, Zachary Amsden wrote:
> Both VMX and SVM require per-cpu memory allocation, which is done at module
> init time, for only online cpus.
>
> Backend was not allocating enough structure for all possible CPUs, so
> new CPUs coming online could not be hardware enabled.
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index e27b7a9..2cd8bc2 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1716,9 +1716,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
> {
> int cpu = (long)v;
>
> - if (!kvm_usage_count)
> - return NOTIFY_OK;
> -
> val&= ~CPU_TASKS_FROZEN;
> switch (val) {
> case CPU_DYING:
>
I still don't see how this bit can work. Maybe if we move the
notification registration to the point where kvm_usage_count is bumped.
--
error compiling committee.c: too many arguments to function
On 09/28/2009 10:30 PM, Avi Kivity wrote:
> On 09/29/2009 06:04 AM, Zachary Amsden wrote:
>> Both VMX and SVM require per-cpu memory allocation, which is done at
>> module
>> init time, for only online cpus.
>>
>> Backend was not allocating enough structure for all possible CPUs, so
>> new CPUs coming online could not be hardware enabled.
>>
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index e27b7a9..2cd8bc2 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -1716,9 +1716,6 @@ static int kvm_cpu_hotplug(struct
>> notifier_block *notifier, unsigned long val,
>> {
>> int cpu = (long)v;
>>
>> - if (!kvm_usage_count)
>> - return NOTIFY_OK;
>> -
>> val&= ~CPU_TASKS_FROZEN;
>> switch (val) {
>> case CPU_DYING:
>
> I still don't see how this bit can work. Maybe if we move the
> notification registration to the point where kvm_usage_count is bumped.
That was stray junk in the patch. Let me rediff...
Signed-off-by: Zachary Amsden <[email protected]>
---
arch/x86/kvm/x86.c | 23 +++++++++++++++--------
1 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fedac9d..15d2ace 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3116,9 +3116,22 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
.notifier_call = kvmclock_cpufreq_notifier
};
+static void kvm_timer_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ tsc_khz_ref = tsc_khz;
+ cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+}
+
int kvm_arch_init(void *opaque)
{
- int r, cpu;
+ int r;
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
@@ -3150,13 +3163,7 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
- for_each_possible_cpu(cpu)
- per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
- if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
- tsc_khz_ref = tsc_khz;
- cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- }
+ kvm_timer_init();
return 0;
--
1.6.4.4
They are globals, not clearly protected by any ordering or locking, and
vulnerable to various startup races.
Instead, for variable TSC machines, register the cpufreq notifier and get
the TSC frequency directly from the cpufreq machinery. Not only is it
always right, it is also perfectly accurate, as no error prone measurement
is required.
On such machines, when a new CPU online is brought online, it isn't clear what
frequency it will start with, and it may not correspond to the reference, thus
in hardware_enable we clear the cpu_tsc_khz variable to zero and make sure
it is set before running on a VCPU.
Signed-off-by: Zachary Amsden <[email protected]>
---
arch/x86/kvm/x86.c | 26 ++++++++++++++++----------
1 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 15d2ace..de4ce8f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1326,6 +1326,8 @@ out:
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_x86_ops->vcpu_load(vcpu, cpu);
+ if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0))
+ per_cpu(cpu_tsc_khz, cpu) = cpufreq_quick_get(cpu);
kvm_request_guest_time_update(vcpu);
}
@@ -3061,9 +3063,6 @@ static void bounce_off(void *info)
/* nothing */
}
-static unsigned int ref_freq;
-static unsigned long tsc_khz_ref;
-
static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
@@ -3072,14 +3071,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
struct kvm_vcpu *vcpu;
int i, send_ipi = 0;
- if (!ref_freq)
- ref_freq = freq->old;
-
if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
return 0;
if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
return 0;
- per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+ per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -3120,12 +3116,14 @@ static void kvm_timer_init(void)
{
int cpu;
- for_each_possible_cpu(cpu)
- per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
- tsc_khz_ref = tsc_khz;
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
+ for_each_online_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu);
+ } else {
+ for_each_possible_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
}
}
@@ -4698,6 +4696,14 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
int kvm_arch_hardware_enable(void *garbage)
{
+ /*
+ * Since this may be called from a hotplug notifcation,
+ * we can't get the CPU frequency directly.
+ */
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ int cpu = raw_smp_processor_id();
+ per_cpu(cpu_tsc_khz, cpu) = 0;
+ }
return kvm_x86_ops->hardware_enable(garbage);
}
--
1.6.4.4
Signed-off-by: Zachary Amsden <[email protected]>
---
arch/x86/kvm/svm.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9a4daca..d1036ce 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -330,13 +330,14 @@ static int svm_hardware_enable(void *garbage)
return -EBUSY;
if (!has_svm()) {
- printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
+ printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
+ me);
return -EINVAL;
}
svm_data = per_cpu(svm_data, me);
if (!svm_data) {
- printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
+ printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
me);
return -EINVAL;
}
--
1.6.4.4
Both VMX and SVM require per-cpu memory allocation, which is done at module
init time, for only online cpus.
Backend was not allocating enough structure for all possible CPUs, so
new CPUs coming online could not be hardware enabled.
Signed-off-by: Zachary Amsden <[email protected]>
---
arch/x86/kvm/svm.c | 4 ++--
arch/x86/kvm/vmx.c | 6 ++++--
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d1036ce..02a4269 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -482,7 +482,7 @@ static __init int svm_hardware_setup(void)
kvm_enable_efer_bits(EFER_SVME);
}
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
goto err;
@@ -516,7 +516,7 @@ static __exit void svm_hardware_unsetup(void)
{
int cpu;
- for_each_online_cpu(cpu)
+ for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3fe0d42..e86f1a6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1350,15 +1350,17 @@ static void free_kvm_area(void)
{
int cpu;
- for_each_online_cpu(cpu)
+ for_each_possible_cpu(cpu) {
free_vmcs(per_cpu(vmxarea, cpu));
+ per_cpu(vmxarea, cpu) = NULL;
+ }
}
static __init int alloc_kvm_area(void)
{
int cpu;
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
struct vmcs *vmcs;
vmcs = alloc_vmcs_cpu(cpu);
--
1.6.4.4
On 09/29/2009 11:38 PM, Zachary Amsden wrote:
> Signed-off-by: Zachary Amsden<[email protected]>
>
Looks good.
Is anything preventing us from unifying the constant_tsc and !same
paths? We could just do a quick check in the notifier, see the tsc
frequency hasn't changed, and return.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
On Tue, Sep 29, 2009 at 11:38:37AM -1000, Zachary Amsden wrote:
> Both VMX and SVM require per-cpu memory allocation, which is done at module
> init time, for only online cpus.
>
> Backend was not allocating enough structure for all possible CPUs, so
> new CPUs coming online could not be hardware enabled.
>
> Signed-off-by: Zachary Amsden <[email protected]>
Applied all, thanks.
On 09/29/2009 10:45 PM, Avi Kivity wrote:
> On 09/29/2009 11:38 PM, Zachary Amsden wrote:
>> Signed-off-by: Zachary Amsden<[email protected]>
>
>
> Looks good.
>
> Is anything preventing us from unifying the constant_tsc and !same
> paths? We could just do a quick check in the notifier, see the tsc
> frequency hasn't changed, and return.
Actually, yes. On constant_tsc processors, the processor frequency may
still change, however the TSC frequency does not change with it.
I actually have both of these kinds of processors (freq changes with
constant TSC and freq changes with variable TSC) so I was able to test
both of these cases.
Zach
On 09/30/2009 05:51 PM, Zachary Amsden wrote:
>> Is anything preventing us from unifying the constant_tsc and !same
>> paths? We could just do a quick check in the notifier, see the tsc
>> frequency hasn't changed, and return.
>
>
> Actually, yes. On constant_tsc processors, the processor frequency
> may still change, however the TSC frequency does not change with it.
>
> I actually have both of these kinds of processors (freq changes with
> constant TSC and freq changes with variable TSC) so I was able to test
> both of these cases.
If the API allows us to query the tsc frequency, it would simply return
the same values in all cases, which we'd ignore.
--
error compiling committee.c: too many arguments to function
On 09/30/2009 05:56 AM, Avi Kivity wrote:
> On 09/30/2009 05:51 PM, Zachary Amsden wrote:
>
> If the API allows us to query the tsc frequency, it would simply
> return the same values in all cases, which we'd ignore.
The API only allows querying the processor frequency. In the
constant_tsc case, the highest processor frequency is likely going to be
the actual TSC frequency, but I don't think it's a guarantee;
theoretically, it could be faster on normal hardware ... or slower on
overclocked hardware with an externally clocked TSC.
Zach
On 09/30/2009 06:06 PM, Zachary Amsden wrote:
> On 09/30/2009 05:56 AM, Avi Kivity wrote:
>> On 09/30/2009 05:51 PM, Zachary Amsden wrote:
>>
>> If the API allows us to query the tsc frequency, it would simply
>> return the same values in all cases, which we'd ignore.
>
> The API only allows querying the processor frequency. In the
> constant_tsc case, the highest processor frequency is likely going to
> be the actual TSC frequency, but I don't think it's a guarantee;
> theoretically, it could be faster on normal hardware ... or slower on
> overclocked hardware with an externally clocked TSC.
Well we could add a new API then (or a new tscfreq notifier). Those
conditionals don't belong in client code.
--
error compiling committee.c: too many arguments to function
On 09/30/2009 06:11 AM, Avi Kivity wrote:
> On 09/30/2009 06:06 PM, Zachary Amsden wrote:
>> On 09/30/2009 05:56 AM, Avi Kivity wrote:
>>> On 09/30/2009 05:51 PM, Zachary Amsden wrote:
>>>
>>> If the API allows us to query the tsc frequency, it would simply
>>> return the same values in all cases, which we'd ignore.
>>
>> The API only allows querying the processor frequency. In the
>> constant_tsc case, the highest processor frequency is likely going to
>> be the actual TSC frequency, but I don't think it's a guarantee;
>> theoretically, it could be faster on normal hardware ... or slower on
>> overclocked hardware with an externally clocked TSC.
>
> Well we could add a new API then (or a new tscfreq notifier). Those
> conditionals don't belong in client code.
It's possible... but it's also possible to run without cpufreq enabled,
which won't work properly unless the cpufreq code is aware of the
measured tsc_khz... this could be a little ugly architecture wise given
the big melting pot of generic code and vendor / arch specific code here.
Since we're already very hardware dependent and one of the few clients
who care, it seems okay to leave it as is for now.
Zach
Zachary Amsden wrote:
> They are globals, not clearly protected by any ordering or locking, and
> vulnerable to various startup races.
>
> Instead, for variable TSC machines, register the cpufreq notifier and get
> the TSC frequency directly from the cpufreq machinery. Not only is it
> always right, it is also perfectly accurate, as no error prone measurement
> is required.
>
> On such machines, when a new CPU online is brought online, it isn't clear what
> frequency it will start with, and it may not correspond to the reference, thus
> in hardware_enable we clear the cpu_tsc_khz variable to zero and make sure
> it is set before running on a VCPU.
>
> Signed-off-by: Zachary Amsden <[email protected]>
> ---
> arch/x86/kvm/x86.c | 26 ++++++++++++++++----------
> 1 files changed, 16 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 15d2ace..de4ce8f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1326,6 +1326,8 @@ out:
> void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> {
> kvm_x86_ops->vcpu_load(vcpu, cpu);
> + if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0))
> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_quick_get(cpu);
> kvm_request_guest_time_update(vcpu);
> }
>
> @@ -3061,9 +3063,6 @@ static void bounce_off(void *info)
> /* nothing */
> }
>
> -static unsigned int ref_freq;
> -static unsigned long tsc_khz_ref;
> -
> static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
> void *data)
> {
> @@ -3072,14 +3071,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
> struct kvm_vcpu *vcpu;
> int i, send_ipi = 0;
>
> - if (!ref_freq)
> - ref_freq = freq->old;
> -
> if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
> return 0;
> if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
> return 0;
> - per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
> + per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
>
> spin_lock(&kvm_lock);
> list_for_each_entry(kvm, &vm_list, vm_list) {
> @@ -3120,12 +3116,14 @@ static void kvm_timer_init(void)
> {
> int cpu;
>
> - for_each_possible_cpu(cpu)
> - per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
> if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
> - tsc_khz_ref = tsc_khz;
> cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
> CPUFREQ_TRANSITION_NOTIFIER);
> + for_each_online_cpu(cpu)
> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu);
This doesn't build for !CONFIG_CPU_FREQ.
Jan
On 10/08/2009 01:18 PM, Jan Kiszka wrote:
> Zachary Amsden wrote:
>
>> They are globals, not clearly protected by any ordering or locking, and
>> vulnerable to various startup races.
>>
>> Instead, for variable TSC machines, register the cpufreq notifier and get
>> the TSC frequency directly from the cpufreq machinery. Not only is it
>> always right, it is also perfectly accurate, as no error prone measurement
>> is required.
>>
>> On such machines, when a new CPU online is brought online, it isn't clear what
>> frequency it will start with, and it may not correspond to the reference, thus
>> in hardware_enable we clear the cpu_tsc_khz variable to zero and make sure
>> it is set before running on a VCPU.
>>
>> Signed-off-by: Zachary Amsden<[email protected]>
>> ---
>> arch/x86/kvm/x86.c | 26 ++++++++++++++++----------
>> 1 files changed, 16 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 15d2ace..de4ce8f 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -1326,6 +1326,8 @@ out:
>> void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>> {
>> kvm_x86_ops->vcpu_load(vcpu, cpu);
>> + if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0))
>> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_quick_get(cpu);
>> kvm_request_guest_time_update(vcpu);
>> }
>>
>> @@ -3061,9 +3063,6 @@ static void bounce_off(void *info)
>> /* nothing */
>> }
>>
>> -static unsigned int ref_freq;
>> -static unsigned long tsc_khz_ref;
>> -
>> static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
>> void *data)
>> {
>> @@ -3072,14 +3071,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
>> struct kvm_vcpu *vcpu;
>> int i, send_ipi = 0;
>>
>> - if (!ref_freq)
>> - ref_freq = freq->old;
>> -
>> if (val == CPUFREQ_PRECHANGE&& freq->old> freq->new)
>> return 0;
>> if (val == CPUFREQ_POSTCHANGE&& freq->old< freq->new)
>> return 0;
>> - per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
>> + per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
>>
>> spin_lock(&kvm_lock);
>> list_for_each_entry(kvm,&vm_list, vm_list) {
>> @@ -3120,12 +3116,14 @@ static void kvm_timer_init(void)
>> {
>> int cpu;
>>
>> - for_each_possible_cpu(cpu)
>> - per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
>> if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
>> - tsc_khz_ref = tsc_khz;
>> cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
>> CPUFREQ_TRANSITION_NOTIFIER);
>> + for_each_online_cpu(cpu)
>> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu);
>>
> This doesn't build for !CONFIG_CPU_FREQ.
>
And did it before?
Zachary Amsden wrote:
> On 10/08/2009 01:18 PM, Jan Kiszka wrote:
>> Zachary Amsden wrote:
>>
>>> They are globals, not clearly protected by any ordering or locking, and
>>> vulnerable to various startup races.
>>>
>>> Instead, for variable TSC machines, register the cpufreq notifier and
>>> get
>>> the TSC frequency directly from the cpufreq machinery. Not only is it
>>> always right, it is also perfectly accurate, as no error prone
>>> measurement
>>> is required.
>>>
>>> On such machines, when a new CPU online is brought online, it isn't
>>> clear what
>>> frequency it will start with, and it may not correspond to the
>>> reference, thus
>>> in hardware_enable we clear the cpu_tsc_khz variable to zero and make
>>> sure
>>> it is set before running on a VCPU.
>>>
>>> Signed-off-by: Zachary Amsden<[email protected]>
>>> ---
>>> arch/x86/kvm/x86.c | 26 ++++++++++++++++----------
>>> 1 files changed, 16 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 15d2ace..de4ce8f 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -1326,6 +1326,8 @@ out:
>>> void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>> {
>>> kvm_x86_ops->vcpu_load(vcpu, cpu);
>>> + if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0))
>>> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_quick_get(cpu);
>>> kvm_request_guest_time_update(vcpu);
>>> }
>>>
>>> @@ -3061,9 +3063,6 @@ static void bounce_off(void *info)
>>> /* nothing */
>>> }
>>>
>>> -static unsigned int ref_freq;
>>> -static unsigned long tsc_khz_ref;
>>> -
>>> static int kvmclock_cpufreq_notifier(struct notifier_block *nb,
>>> unsigned long val,
>>> void *data)
>>> {
>>> @@ -3072,14 +3071,11 @@ static int kvmclock_cpufreq_notifier(struct
>>> notifier_block *nb, unsigned long va
>>> struct kvm_vcpu *vcpu;
>>> int i, send_ipi = 0;
>>>
>>> - if (!ref_freq)
>>> - ref_freq = freq->old;
>>> -
>>> if (val == CPUFREQ_PRECHANGE&& freq->old> freq->new)
>>> return 0;
>>> if (val == CPUFREQ_POSTCHANGE&& freq->old< freq->new)
>>> return 0;
>>> - per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref,
>>> ref_freq, freq->new);
>>> + per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
>>>
>>> spin_lock(&kvm_lock);
>>> list_for_each_entry(kvm,&vm_list, vm_list) {
>>> @@ -3120,12 +3116,14 @@ static void kvm_timer_init(void)
>>> {
>>> int cpu;
>>>
>>> - for_each_possible_cpu(cpu)
>>> - per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
>>> if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
>>> - tsc_khz_ref = tsc_khz;
>>> cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
>>> CPUFREQ_TRANSITION_NOTIFIER);
>>> + for_each_online_cpu(cpu)
>>> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu);
>>>
>> This doesn't build for !CONFIG_CPU_FREQ.
>>
>
> And did it before?
Yes, because cpufreq_get, which is undefined without CONFIG_CPU_FREQ,
did not exist so far. One may argue that this is a deficit of the
cpufreq API. However, it needs fixing.
Jan
On 10/09/2009 10:36 AM, Jan Kiszka wrote:
> Zachary Amsden wrote:
>
>> On 10/08/2009 01:18 PM, Jan Kiszka wrote:
>>
>>> Zachary Amsden wrote:
>>>
>>>
>>>> They are globals, not clearly protected by any ordering or locking, and
>>>> vulnerable to various startup races.
>>>>
>>>> Instead, for variable TSC machines, register the cpufreq notifier and
>>>> get
>>>> the TSC frequency directly from the cpufreq machinery. Not only is it
>>>> always right, it is also perfectly accurate, as no error prone
>>>> measurement
>>>> is required.
>>>>
>>>> On such machines, when a new CPU online is brought online, it isn't
>>>> clear what
>>>> frequency it will start with, and it may not correspond to the
>>>> reference, thus
>>>> in hardware_enable we clear the cpu_tsc_khz variable to zero and make
>>>> sure
>>>> it is set before running on a VCPU.
>>>>
>>>> Signed-off-by: Zachary Amsden<[email protected]>
>>>> ---
>>>> arch/x86/kvm/x86.c | 26 ++++++++++++++++----------
>>>> 1 files changed, 16 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> index 15d2ace..de4ce8f 100644
>>>> --- a/arch/x86/kvm/x86.c
>>>> +++ b/arch/x86/kvm/x86.c
>>>> @@ -1326,6 +1326,8 @@ out:
>>>> void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>> {
>>>> kvm_x86_ops->vcpu_load(vcpu, cpu);
>>>> + if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0))
>>>> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_quick_get(cpu);
>>>> kvm_request_guest_time_update(vcpu);
>>>> }
>>>>
>>>> @@ -3061,9 +3063,6 @@ static void bounce_off(void *info)
>>>> /* nothing */
>>>> }
>>>>
>>>> -static unsigned int ref_freq;
>>>> -static unsigned long tsc_khz_ref;
>>>> -
>>>> static int kvmclock_cpufreq_notifier(struct notifier_block *nb,
>>>> unsigned long val,
>>>> void *data)
>>>> {
>>>> @@ -3072,14 +3071,11 @@ static int kvmclock_cpufreq_notifier(struct
>>>> notifier_block *nb, unsigned long va
>>>> struct kvm_vcpu *vcpu;
>>>> int i, send_ipi = 0;
>>>>
>>>> - if (!ref_freq)
>>>> - ref_freq = freq->old;
>>>> -
>>>> if (val == CPUFREQ_PRECHANGE&& freq->old> freq->new)
>>>> return 0;
>>>> if (val == CPUFREQ_POSTCHANGE&& freq->old< freq->new)
>>>> return 0;
>>>> - per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref,
>>>> ref_freq, freq->new);
>>>> + per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
>>>>
>>>> spin_lock(&kvm_lock);
>>>> list_for_each_entry(kvm,&vm_list, vm_list) {
>>>> @@ -3120,12 +3116,14 @@ static void kvm_timer_init(void)
>>>> {
>>>> int cpu;
>>>>
>>>> - for_each_possible_cpu(cpu)
>>>> - per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
>>>> if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
>>>> - tsc_khz_ref = tsc_khz;
>>>> cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
>>>> CPUFREQ_TRANSITION_NOTIFIER);
>>>> + for_each_online_cpu(cpu)
>>>> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu);
>>>>
>>>>
>>> This doesn't build for !CONFIG_CPU_FREQ.
>>>
>>>
>> And did it before?
>>
> Yes, because cpufreq_get, which is undefined without CONFIG_CPU_FREQ,
> did not exist so far. One may argue that this is a deficit of the
> cpufreq API. However, it needs fixing.
>
I'll send a patch today. I'm going with API deficient and will fix
that, but this means I will also require a fix for kvm-kmod :(
Zach
On 10/09/2009 10:47 AM, Jan Kiszka wrote:
> Zachary Amsden wrote:
>
>> On 10/09/2009 10:36 AM, Jan Kiszka wrote:
>>
>>> Zachary Amsden wrote:
>>>
>>>
>>>> On 10/08/2009 01:18 PM, Jan Kiszka wrote:
>>>>
>>>>
>>>>> Zachary Amsden wrote:
>>>>>
>>>>>
>>>>>
>>>>>> They are globals, not clearly protected by any ordering or locking,
>>>>>> and
>>>>>> vulnerable to various startup races.
>>>>>>
>>>>>> Instead, for variable TSC machines, register the cpufreq notifier and
>>>>>> get
>>>>>> the TSC frequency directly from the cpufreq machinery. Not only is it
>>>>>> always right, it is also perfectly accurate, as no error prone
>>>>>> measurement
>>>>>> is required.
>>>>>>
>>>>>> On such machines, when a new CPU online is brought online, it isn't
>>>>>> clear what
>>>>>> frequency it will start with, and it may not correspond to the
>>>>>> reference, thus
>>>>>> in hardware_enable we clear the cpu_tsc_khz variable to zero and make
>>>>>> sure
>>>>>> it is set before running on a VCPU.
>>>>>>
>>>>>> Signed-off-by: Zachary Amsden<[email protected]>
>>>>>> ---
>>>>>> arch/x86/kvm/x86.c | 26 ++++++++++++++++----------
>>>>>> 1 files changed, 16 insertions(+), 10 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>>> index 15d2ace..de4ce8f 100644
>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>> @@ -1326,6 +1326,8 @@ out:
>>>>>> void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>>>> {
>>>>>> kvm_x86_ops->vcpu_load(vcpu, cpu);
>>>>>> + if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0))
>>>>>> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_quick_get(cpu);
>>>>>> kvm_request_guest_time_update(vcpu);
>>>>>> }
>>>>>>
>>>>>> @@ -3061,9 +3063,6 @@ static void bounce_off(void *info)
>>>>>> /* nothing */
>>>>>> }
>>>>>>
>>>>>> -static unsigned int ref_freq;
>>>>>> -static unsigned long tsc_khz_ref;
>>>>>> -
>>>>>> static int kvmclock_cpufreq_notifier(struct notifier_block *nb,
>>>>>> unsigned long val,
>>>>>> void *data)
>>>>>> {
>>>>>> @@ -3072,14 +3071,11 @@ static int kvmclock_cpufreq_notifier(struct
>>>>>> notifier_block *nb, unsigned long va
>>>>>> struct kvm_vcpu *vcpu;
>>>>>> int i, send_ipi = 0;
>>>>>>
>>>>>> - if (!ref_freq)
>>>>>> - ref_freq = freq->old;
>>>>>> -
>>>>>> if (val == CPUFREQ_PRECHANGE&& freq->old> freq->new)
>>>>>> return 0;
>>>>>> if (val == CPUFREQ_POSTCHANGE&& freq->old< freq->new)
>>>>>> return 0;
>>>>>> - per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref,
>>>>>> ref_freq, freq->new);
>>>>>> + per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
>>>>>>
>>>>>> spin_lock(&kvm_lock);
>>>>>> list_for_each_entry(kvm,&vm_list, vm_list) {
>>>>>> @@ -3120,12 +3116,14 @@ static void kvm_timer_init(void)
>>>>>> {
>>>>>> int cpu;
>>>>>>
>>>>>> - for_each_possible_cpu(cpu)
>>>>>> - per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
>>>>>> if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
>>>>>> - tsc_khz_ref = tsc_khz;
>>>>>> cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
>>>>>> CPUFREQ_TRANSITION_NOTIFIER);
>>>>>> + for_each_online_cpu(cpu)
>>>>>> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu);
>>>>>>
>>>>>>
>>>>>>
>>>>> This doesn't build for !CONFIG_CPU_FREQ.
>>>>>
>>>>>
>>>>>
>>>> And did it before?
>>>>
>>>>
>>> Yes, because cpufreq_get, which is undefined without CONFIG_CPU_FREQ,
>>> did not exist so far. One may argue that this is a deficit of the
>>> cpufreq API. However, it needs fixing.
>>>
>>>
>> I'll send a patch today. I'm going with API deficient and will fix
>> that, but this means I will also require a fix for kvm-kmod :(
>>
> That won't be tricky, I will queue up a fix.
>
> BTW, is the KVM prepared for cpufreq_get returning 0?
>
That will be part of the fix, I think.
Zachary Amsden wrote:
> On 10/09/2009 10:36 AM, Jan Kiszka wrote:
>> Zachary Amsden wrote:
>>
>>> On 10/08/2009 01:18 PM, Jan Kiszka wrote:
>>>
>>>> Zachary Amsden wrote:
>>>>
>>>>
>>>>> They are globals, not clearly protected by any ordering or locking,
>>>>> and
>>>>> vulnerable to various startup races.
>>>>>
>>>>> Instead, for variable TSC machines, register the cpufreq notifier and
>>>>> get
>>>>> the TSC frequency directly from the cpufreq machinery. Not only is it
>>>>> always right, it is also perfectly accurate, as no error prone
>>>>> measurement
>>>>> is required.
>>>>>
>>>>> On such machines, when a new CPU online is brought online, it isn't
>>>>> clear what
>>>>> frequency it will start with, and it may not correspond to the
>>>>> reference, thus
>>>>> in hardware_enable we clear the cpu_tsc_khz variable to zero and make
>>>>> sure
>>>>> it is set before running on a VCPU.
>>>>>
>>>>> Signed-off-by: Zachary Amsden<[email protected]>
>>>>> ---
>>>>> arch/x86/kvm/x86.c | 26 ++++++++++++++++----------
>>>>> 1 files changed, 16 insertions(+), 10 deletions(-)
>>>>>
>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>> index 15d2ace..de4ce8f 100644
>>>>> --- a/arch/x86/kvm/x86.c
>>>>> +++ b/arch/x86/kvm/x86.c
>>>>> @@ -1326,6 +1326,8 @@ out:
>>>>> void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>>> {
>>>>> kvm_x86_ops->vcpu_load(vcpu, cpu);
>>>>> + if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0))
>>>>> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_quick_get(cpu);
>>>>> kvm_request_guest_time_update(vcpu);
>>>>> }
>>>>>
>>>>> @@ -3061,9 +3063,6 @@ static void bounce_off(void *info)
>>>>> /* nothing */
>>>>> }
>>>>>
>>>>> -static unsigned int ref_freq;
>>>>> -static unsigned long tsc_khz_ref;
>>>>> -
>>>>> static int kvmclock_cpufreq_notifier(struct notifier_block *nb,
>>>>> unsigned long val,
>>>>> void *data)
>>>>> {
>>>>> @@ -3072,14 +3071,11 @@ static int kvmclock_cpufreq_notifier(struct
>>>>> notifier_block *nb, unsigned long va
>>>>> struct kvm_vcpu *vcpu;
>>>>> int i, send_ipi = 0;
>>>>>
>>>>> - if (!ref_freq)
>>>>> - ref_freq = freq->old;
>>>>> -
>>>>> if (val == CPUFREQ_PRECHANGE&& freq->old> freq->new)
>>>>> return 0;
>>>>> if (val == CPUFREQ_POSTCHANGE&& freq->old< freq->new)
>>>>> return 0;
>>>>> - per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref,
>>>>> ref_freq, freq->new);
>>>>> + per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
>>>>>
>>>>> spin_lock(&kvm_lock);
>>>>> list_for_each_entry(kvm,&vm_list, vm_list) {
>>>>> @@ -3120,12 +3116,14 @@ static void kvm_timer_init(void)
>>>>> {
>>>>> int cpu;
>>>>>
>>>>> - for_each_possible_cpu(cpu)
>>>>> - per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
>>>>> if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
>>>>> - tsc_khz_ref = tsc_khz;
>>>>> cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
>>>>> CPUFREQ_TRANSITION_NOTIFIER);
>>>>> + for_each_online_cpu(cpu)
>>>>> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu);
>>>>>
>>>>>
>>>> This doesn't build for !CONFIG_CPU_FREQ.
>>>>
>>>>
>>> And did it before?
>>>
>> Yes, because cpufreq_get, which is undefined without CONFIG_CPU_FREQ,
>> did not exist so far. One may argue that this is a deficit of the
>> cpufreq API. However, it needs fixing.
>>
>
> I'll send a patch today. I'm going with API deficient and will fix
> that, but this means I will also require a fix for kvm-kmod :(
That won't be tricky, I will queue up a fix.
BTW, is the KVM prepared for cpufreq_get returning 0?
Jan
Zachary Amsden wrote:
> On 10/09/2009 10:47 AM, Jan Kiszka wrote:
>> Zachary Amsden wrote:
>>
>>> On 10/09/2009 10:36 AM, Jan Kiszka wrote:
>>>
>>>> Zachary Amsden wrote:
>>>>
>>>>
>>>>> On 10/08/2009 01:18 PM, Jan Kiszka wrote:
>>>>>
>>>>>
>>>>>> Zachary Amsden wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>>> They are globals, not clearly protected by any ordering or locking,
>>>>>>> and
>>>>>>> vulnerable to various startup races.
>>>>>>>
>>>>>>> Instead, for variable TSC machines, register the cpufreq notifier
>>>>>>> and
>>>>>>> get
>>>>>>> the TSC frequency directly from the cpufreq machinery. Not only
>>>>>>> is it
>>>>>>> always right, it is also perfectly accurate, as no error prone
>>>>>>> measurement
>>>>>>> is required.
>>>>>>>
>>>>>>> On such machines, when a new CPU online is brought online, it isn't
>>>>>>> clear what
>>>>>>> frequency it will start with, and it may not correspond to the
>>>>>>> reference, thus
>>>>>>> in hardware_enable we clear the cpu_tsc_khz variable to zero and
>>>>>>> make
>>>>>>> sure
>>>>>>> it is set before running on a VCPU.
>>>>>>>
>>>>>>> Signed-off-by: Zachary Amsden<[email protected]>
>>>>>>> ---
>>>>>>> arch/x86/kvm/x86.c | 26 ++++++++++++++++----------
>>>>>>> 1 files changed, 16 insertions(+), 10 deletions(-)
>>>>>>>
>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>>>>> index 15d2ace..de4ce8f 100644
>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>> @@ -1326,6 +1326,8 @@ out:
>>>>>>> void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>>>>> {
>>>>>>> kvm_x86_ops->vcpu_load(vcpu, cpu);
>>>>>>> + if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0))
>>>>>>> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_quick_get(cpu);
>>>>>>> kvm_request_guest_time_update(vcpu);
>>>>>>> }
>>>>>>>
>>>>>>> @@ -3061,9 +3063,6 @@ static void bounce_off(void *info)
>>>>>>> /* nothing */
>>>>>>> }
>>>>>>>
>>>>>>> -static unsigned int ref_freq;
>>>>>>> -static unsigned long tsc_khz_ref;
>>>>>>> -
>>>>>>> static int kvmclock_cpufreq_notifier(struct notifier_block *nb,
>>>>>>> unsigned long val,
>>>>>>> void *data)
>>>>>>> {
>>>>>>> @@ -3072,14 +3071,11 @@ static int kvmclock_cpufreq_notifier(struct
>>>>>>> notifier_block *nb, unsigned long va
>>>>>>> struct kvm_vcpu *vcpu;
>>>>>>> int i, send_ipi = 0;
>>>>>>>
>>>>>>> - if (!ref_freq)
>>>>>>> - ref_freq = freq->old;
>>>>>>> -
>>>>>>> if (val == CPUFREQ_PRECHANGE&& freq->old> freq->new)
>>>>>>> return 0;
>>>>>>> if (val == CPUFREQ_POSTCHANGE&& freq->old< freq->new)
>>>>>>> return 0;
>>>>>>> - per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref,
>>>>>>> ref_freq, freq->new);
>>>>>>> + per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
>>>>>>>
>>>>>>> spin_lock(&kvm_lock);
>>>>>>> list_for_each_entry(kvm,&vm_list, vm_list) {
>>>>>>> @@ -3120,12 +3116,14 @@ static void kvm_timer_init(void)
>>>>>>> {
>>>>>>> int cpu;
>>>>>>>
>>>>>>> - for_each_possible_cpu(cpu)
>>>>>>> - per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
>>>>>>> if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
>>>>>>> - tsc_khz_ref = tsc_khz;
>>>>>>>
>>>>>>> cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
>>>>>>> CPUFREQ_TRANSITION_NOTIFIER);
>>>>>>> + for_each_online_cpu(cpu)
>>>>>>> + per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu);
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>> This doesn't build for !CONFIG_CPU_FREQ.
>>>>>>
>>>>>>
>>>>>>
>>>>> And did it before?
>>>>>
>>>>>
>>>> Yes, because cpufreq_get, which is undefined without CONFIG_CPU_FREQ,
>>>> did not exist so far. One may argue that this is a deficit of the
>>>> cpufreq API. However, it needs fixing.
>>>>
>>>>
>>> I'll send a patch today. I'm going with API deficient and will fix
>>> that, but this means I will also require a fix for kvm-kmod :(
>>>
>> That won't be tricky, I will queue up a fix.
>>
>> BTW, is the KVM prepared for cpufreq_get returning 0?
>>
>
> That will be part of the fix, I think.
>
That's good, specifically as it looks like cpufreq_get can return 0 even
in the CONFIG_CPU_FREQ case, at least theoretically.
Jan
PS:
diff --git a/external-module-compat-comm.h b/external-module-compat-comm.h
index 47fdc86..de8ab23 100644
--- a/external-module-compat-comm.h
+++ b/external-module-compat-comm.h
@@ -993,3 +993,10 @@ unsigned long kvm_vma_kernel_pagesize(struct vm_area_struct *vma)
} \
})
#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) && !defined(CONFIG_CPU_FREQ)
+static inline unsigned int cpufreq_get(unsigned int cpu)
+{
+ return 0;
+}
+#endif