2007-06-01 06:12:16

by Avi Kivity

[permalink] [raw]
Subject: [GIT PULL][RESEND] KVM cpu hotplug fixes

Linus,

Please pull from the repository and branch

git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git hotplug-linus

to receive a patchset which makes cpu hotplug (and therefore, suspend
and resume) more robust when running virtual machines. The core issue
is that we need a notification about a cpu going away at a point in time
where it's still alive, but not running any user processes. Such a
notification does not exist today, so the patchset adds it as a new
CPU_DYING notification.

The patchset is against 2.6.22-rc3.

Shortlog:

Avi Kivity (7):
HOTPLUG: Add CPU_DYING notifier
HOTPLUG: Adapt cpuset hotplug callback to CPU_DYING
HOTPLUG: Adapt thermal throttle to CPU_DYING
SMP: Implement on_cpu()
KVM: Keep track of which cpus have virtualization enabled
KVM: Tune hotplug/suspend IPIs
KVM: Use CPU_DYING for disabling virtualization

arch/i386/kernel/cpu/mcheck/therm_throt.c | 6 ++-
drivers/kvm/kvm_main.c | 50
+++++++++++++++++++++--------
include/linux/notifier.h | 3 ++
include/linux/smp.h | 16 +++++++++
kernel/cpu.c | 16 ++++++++-
kernel/cpuset.c | 3 ++
kernel/softirq.c | 24 ++++++++++++++
7 files changed, 100 insertions(+), 18 deletions(-)

And the patch in all its glory:

diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c
b/arch/i386/kernel/cpu/mcheck/therm_throt.c
index 7ba7c3a..1203dc5 100644
--- a/arch/i386/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c
@@ -134,19 +134,21 @@ static __cpuinit int
thermal_throttle_cpu_callback(struct notifier_block *nfb,
int err;

sys_dev = get_cpu_sysdev(cpu);
- mutex_lock(&therm_cpu_lock);
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
+ mutex_lock(&therm_cpu_lock);
err = thermal_throttle_add_dev(sys_dev);
+ mutex_unlock(&therm_cpu_lock);
WARN_ON(err);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
+ mutex_lock(&therm_cpu_lock);
thermal_throttle_remove_dev(sys_dev);
+ mutex_unlock(&therm_cpu_lock);
break;
}
- mutex_unlock(&therm_cpu_lock);
return NOTIFY_OK;
}

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index da985b3..1ad5ea1 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -41,6 +41,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/sched.h>
+#include <linux/cpumask.h>

#include "x86_emulate.h"
#include "segment_descriptor.h"
@@ -51,8 +52,12 @@ MODULE_LICENSE("GPL");
static DEFINE_SPINLOCK(kvm_lock);
static LIST_HEAD(vm_list);

+static cpumask_t cpus_hardware_enabled;
+
struct kvm_arch_ops *kvm_arch_ops;

+static void hardware_disable(void *ignored);
+
#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)

static struct kvm_stats_debugfs_item {
@@ -2840,7 +2845,7 @@ static int kvm_reboot(struct notifier_block
*notifier, unsigned long val,
* in vmx root mode.
*/
printk(KERN_INFO "kvm: exiting hardware virtualization\n");
- on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+ on_each_cpu(hardware_disable, NULL, 0, 1);
}
return NOTIFY_OK;
}
@@ -2883,28 +2888,46 @@ static void decache_vcpus_on_cpu(int cpu)
spin_unlock(&kvm_lock);
}

+static void hardware_enable(void *junk)
+{
+ int cpu = raw_smp_processor_id();
+
+ if (cpu_isset(cpu, cpus_hardware_enabled))
+ return;
+ cpu_set(cpu, cpus_hardware_enabled);
+ kvm_arch_ops->hardware_enable(NULL);
+}
+
+static void hardware_disable(void *junk)
+{
+ int cpu = raw_smp_processor_id();
+
+ if (!cpu_isset(cpu, cpus_hardware_enabled))
+ return;
+ cpu_clear(cpu, cpus_hardware_enabled);
+ decache_vcpus_on_cpu(cpu);
+ kvm_arch_ops->hardware_disable(NULL);
+}
+
static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned
long val,
void *v)
{
int cpu = (long)v;

switch (val) {
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
+ case CPU_DYING:
+ case CPU_DYING_FROZEN:
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
cpu);
- decache_vcpus_on_cpu(cpu);
- smp_call_function_single(cpu, kvm_arch_ops->hardware_disable,
- NULL, 0, 1);
+ on_cpu(cpu, hardware_disable, NULL, 0, 1);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
cpu);
- smp_call_function_single(cpu, kvm_arch_ops->hardware_enable,
- NULL, 0, 1);
+ on_cpu(cpu, hardware_enable, NULL, 0, 1);
break;
}
return NOTIFY_OK;
@@ -2961,14 +2984,13 @@ static void kvm_exit_debug(void)

static int kvm_suspend(struct sys_device *dev, pm_message_t state)
{
- decache_vcpus_on_cpu(raw_smp_processor_id());
- on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+ hardware_disable(NULL);
return 0;
}

static int kvm_resume(struct sys_device *dev)
{
- on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1);
+ hardware_enable(NULL);
return 0;
}

@@ -3021,7 +3043,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct
module *module)
if (r < 0)
goto out;

- on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1);
+ on_each_cpu(hardware_enable, NULL, 0, 1);
r = register_cpu_notifier(&kvm_cpu_notifier);
if (r)
goto out_free_1;
@@ -3053,7 +3075,7 @@ out_free_2:
unregister_reboot_notifier(&kvm_reboot_notifier);
unregister_cpu_notifier(&kvm_cpu_notifier);
out_free_1:
- on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+ on_each_cpu(hardware_disable, NULL, 0, 1);
kvm_arch_ops->hardware_unsetup();
out:
kvm_arch_ops = NULL;
@@ -3067,7 +3089,7 @@ void kvm_exit_arch(void)
sysdev_class_unregister(&kvm_sysdev_class);
unregister_reboot_notifier(&kvm_reboot_notifier);
unregister_cpu_notifier(&kvm_cpu_notifier);
- on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+ on_each_cpu(hardware_disable, NULL, 0, 1);
kvm_arch_ops->hardware_unsetup();
kvm_arch_ops = NULL;
}
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 9431101..576f2bb 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -196,6 +196,8 @@ extern int __srcu_notifier_call_chain(struct
srcu_notifier_head *nh,
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
#define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */
#define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */
+#define CPU_DYING 0x000A /* CPU (unsigned)v not running any task,
+ * not handling interrupts, soon dead */

/* Used for CPU hotplug events occuring while tasks are frozen due to a
suspend
* operation in progress
@@ -208,6 +210,7 @@ extern int __srcu_notifier_call_chain(struct
srcu_notifier_head *nh,
#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
+#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN)

#endif /* __KERNEL__ */
#endif /* _LINUX_NOTIFIER_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 96ac21f..613edd2 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -7,6 +7,7 @@
*/

#include <linux/errno.h>
+#include <asm/system.h>

extern void cpu_idle(void);

@@ -61,6 +62,11 @@ int smp_call_function_single(int cpuid, void (*func)
(void *info), void *info,
* Call a function on all processors
*/
int on_each_cpu(void (*func) (void *info), void *info, int retry, int
wait);
+/*
+ * Call a function on one processor
+ */
+int on_cpu(int cpu, void (*func)(void *info), void *info,
+ int retry, int wait);

#define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */
#define MSG_ALL 0x8001
@@ -96,6 +102,16 @@ static inline int up_smp_call_function(void)
local_irq_enable(); \
0; \
})
+
+static inline int on_cpu(int cpu, void (*func)(void *info), void *info,
+ int retry, int wait)
+{
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ return 0;
+}
+
static inline void smp_send_reschedule(int cpu) { }
#define num_booting_cpus() 1
#define smp_prepare_boot_cpu() do {} while (0)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 208cf34..181ae70 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -103,11 +103,19 @@ static inline void check_for_tasks(int cpu)
write_unlock_irq(&tasklist_lock);
}

+struct take_cpu_down_param {
+ unsigned long mod;
+ void *hcpu;
+};
+
/* Take this CPU down. */
-static int take_cpu_down(void *unused)
+static int take_cpu_down(void *_param)
{
+ struct take_cpu_down_param *param = _param;
int err;

+ raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+ param->hcpu);
/* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable();
if (err < 0)
@@ -127,6 +135,10 @@ static int _cpu_down(unsigned int cpu, int
tasks_frozen)
cpumask_t old_allowed, tmp;
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ struct take_cpu_down_param tcd_param = {
+ .mod = mod,
+ .hcpu = hcpu,
+ };

if (num_online_cpus() == 1)
return -EBUSY;
@@ -153,7 +165,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
set_cpus_allowed(current, tmp);

mutex_lock(&cpu_bitmask_lock);
- p = __stop_machine_run(take_cpu_down, NULL, cpu);
+ p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
mutex_unlock(&cpu_bitmask_lock);

if (IS_ERR(p) || cpu_online(cpu)) {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f57854b..584953a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2138,6 +2138,9 @@ static void common_cpu_mem_hotplug_unplug(void)
static int cpuset_handle_cpuhp(struct notifier_block *nb,
unsigned long phase, void *cpu)
{
+ if (phase == CPU_DYING || phase == CPU_DYING_FROZEN)
+ return NOTIFY_DONE;
+
common_cpu_mem_hotplug_unplug();
return 0;
}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0b9886a..11666f7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -658,4 +658,28 @@ int on_each_cpu(void (*func) (void *info), void
*info, int retry, int wait)
return ret;
}
EXPORT_SYMBOL(on_each_cpu);
+
+/*
+ * Call a function on one processor, which might be the currently executing
+ * processor.
+ */
+int on_cpu(int cpu, void (*func) (void *info), void *info,
+ int retry, int wait)
+{
+ int ret;
+ int this_cpu;
+
+ this_cpu = get_cpu();
+ if (this_cpu == cpu) {
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ ret = 0;
+ } else
+ ret = smp_call_function_single(cpu, func, info, retry, wait);
+ put_cpu();
+ return ret;
+}
+EXPORT_SYMBOL(on_cpu);
+
#endif

-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel


2007-06-01 15:22:54

by Linus Torvalds

[permalink] [raw]
Subject: Re: [GIT PULL][RESEND] KVM cpu hotplug fixes



On Fri, 1 Jun 2007, Avi Kivity wrote:
>
> Please pull from the repository and branch

No. Not after -rc1. Not for something that changes core code and isn't a
core feature, and wasn't a regression.

> The core issue is that we need a notification [...]

No. The core issue here is that people need to understand that if you miss
the merge window, and it's new development, YOU DAMN WELL WAIT FOR THE
NEXT ONE.

Don't send me pull requests like this. And absolutely do NOT send them as
resends. I just get grumpy.

If all the added code had been KVM-only, I might not care. But when the
bulk of the code touches core files, you had better explain why this is so
important that it cannot wait for the next merge window.

Linus

2007-06-01 16:32:23

by Avi Kivity

[permalink] [raw]
Subject: Re: [GIT PULL][RESEND] KVM cpu hotplug fixes

Linus Torvalds wrote:
> Don't send me pull requests like this. And absolutely do NOT send them as
> resends. I just get grumpy.
>
> If all the added code had been KVM-only, I might not care. But when the
> bulk of the code touches core files, you had better explain why this is so
> important that it cannot wait for the next merge window.

Ok. This was really wanted by laptop-carrying users, but I guess
they'll have to wait for 2.6.23.


--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

2007-06-02 07:04:11

by Jun Koi

[permalink] [raw]
Subject: Re: [kvm-devel] [GIT PULL][RESEND] KVM cpu hotplug fixes

Hi Avi,

On 6/2/07, Avi Kivity <[email protected]> wrote:
> Linus Torvalds wrote:
> > Don't send me pull requests like this. And absolutely do NOT send them as
> > resends. I just get grumpy.
> >
> > If all the added code had been KVM-only, I might not care. But when the
> > bulk of the code touches core files, you had better explain why this is so
> > important that it cannot wait for the next merge window.
>
> Ok. This was really wanted by laptop-carrying users, but I guess
> they'll have to wait for 2.6.23.
>

Could you explain a bit:

1) Why CPU hotplug is that important for virtualization?

2) Why this patch is wanted by laptop users?

I tried to answer myself those questions, but to no avail.

Thank you,
Jun

2007-06-02 07:33:16

by Avi Kivity

[permalink] [raw]
Subject: Re: [kvm-devel] [GIT PULL][RESEND] KVM cpu hotplug fixes

Jun Koi wrote:
> Hi Avi,
>
> On 6/2/07, Avi Kivity <[email protected]> wrote:
>> Linus Torvalds wrote:
>> > Don't send me pull requests like this. And absolutely do NOT send
>> them as
>> > resends. I just get grumpy.
>> >
>> > If all the added code had been KVM-only, I might not care. But when
>> the
>> > bulk of the code touches core files, you had better explain why
>> this is so
>> > important that it cannot wait for the next merge window.
>>
>> Ok. This was really wanted by laptop-carrying users, but I guess
>> they'll have to wait for 2.6.23.
>>
>
> Could you explain a bit:
>
> 1) Why CPU hotplug is that important for virtualization?
>

It's not specifically important for virtualization. It's a Linux kernel
feature, and right now it doesn't work very well when kvm is enabled,
and that should be fixed.


> 2) Why this patch is wanted by laptop users?

CPU hotplug is used by suspend/resume (basically, suspend hot-unplugs
all cpus except one an then suspends the machine. Resume hotplugs the
cpus back after restarting the devices).

--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.