Signed-off-by: Alex Nixon <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
arch/x86/kernel/paravirt.c | 8 ++++++++
arch/x86/kernel/process_32.c | 10 ++++++++--
arch/x86/kernel/process_64.c | 9 +++++++--
arch/x86/kernel/smpboot.c | 8 ++++----
include/asm-x86/paravirt.h | 27 +++++++++++++++++++++++++++
include/asm-x86/smp.h | 13 +++++++++++--
6 files changed, 65 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 7cd2747..8ffe4d1 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -125,6 +125,7 @@ static void *get_call_destination(u8 type)
.pv_apic_ops = pv_apic_ops,
.pv_mmu_ops = pv_mmu_ops,
.pv_lock_ops = pv_lock_ops,
+ .pv_hotplug_ops = pv_hotplug_ops,
};
return *((void **)&tmpl + type);
}
@@ -455,9 +456,16 @@ struct pv_mmu_ops pv_mmu_ops = {
.set_fixmap = native_set_fixmap,
};
+struct pv_hotplug_ops pv_hotplug_ops = {
+ .cpu_die = native_cpu_die,
+ .cpu_disable = native_cpu_disable,
+ .play_dead = native_play_dead,
+};
+
EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL (pv_cpu_ops);
EXPORT_SYMBOL (pv_mmu_ops);
EXPORT_SYMBOL_GPL(pv_apic_ops);
EXPORT_SYMBOL_GPL(pv_info);
EXPORT_SYMBOL (pv_irq_ops);
+EXPORT_SYMBOL (pv_hotplug_ops);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 593b73e..53887cd 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -77,6 +77,12 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
#ifdef CONFIG_HOTPLUG_CPU
#include <asm/nmi.h>
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define play_dead native_play_dead
+#endif
+
static void cpu_exit_clear(void)
{
int cpu = raw_smp_processor_id();
@@ -93,7 +99,7 @@ static void cpu_exit_clear(void)
}
/* We don't actually take CPU down, just spin without interrupts. */
-static inline void play_dead(void)
+void native_play_dead(void)
{
/* This must be done before dead CPU ack */
cpu_exit_clear();
@@ -109,7 +115,7 @@ static inline void play_dead(void)
wbinvd_halt();
}
#else
-static inline void play_dead(void)
+void play_dead(void)
{
BUG();
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 8248dc0..7512044 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -86,13 +86,18 @@ void exit_idle(void)
return;
__exit_idle();
}
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define play_dead native_play_dead
+#endif
#ifdef CONFIG_HOTPLUG_CPU
DECLARE_PER_CPU(int, cpu_state);
#include <linux/nmi.h>
/* We halt the CPU with physical CPU hotplug */
-static inline void play_dead(void)
+void native_play_dead(void)
{
idle_task_exit();
mb();
@@ -104,7 +109,7 @@ static inline void play_dead(void)
wbinvd_halt();
}
#else
-static inline void play_dead(void)
+void native_play_dead(void)
{
BUG();
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index eea2fd6..2a555a1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1340,7 +1340,7 @@ static void __ref remove_cpu_from_maps(int cpu)
numa_remove_cpu(cpu);
}
-int __cpu_disable(void)
+int native_cpu_disable(void)
{
int cpu = smp_processor_id();
@@ -1379,7 +1379,7 @@ int __cpu_disable(void)
return 0;
}
-void __cpu_die(unsigned int cpu)
+void native_cpu_die(unsigned int cpu)
{
/* We don't do anything here: idle task is faking death itself. */
unsigned int i;
@@ -1397,12 +1397,12 @@ void __cpu_die(unsigned int cpu)
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
#else /* ... !CONFIG_HOTPLUG_CPU */
-int __cpu_disable(void)
+int native_cpu_disable(void)
{
return -ENOSYS;
}
-void __cpu_die(unsigned int cpu)
+void native_cpu_die(unsigned int cpu)
{
/* We said "no" in __cpu_disable */
BUG();
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index eca8c4f..fd922c3 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -332,6 +332,12 @@ struct pv_lock_ops {
void (*spin_unlock)(struct raw_spinlock *lock);
};
+struct pv_hotplug_ops {
+ void (*play_dead)(void);
+ int (*cpu_disable)(void);
+ void (*cpu_die)(unsigned int);
+};
+
/* This contains all the paravirt structures: we get a convenient
* number for each function using the offset which we use to indicate
* what to patch. */
@@ -343,6 +349,7 @@ struct paravirt_patch_template {
struct pv_apic_ops pv_apic_ops;
struct pv_mmu_ops pv_mmu_ops;
struct pv_lock_ops pv_lock_ops;
+ struct pv_hotplug_ops pv_hotplug_ops;
};
extern struct pv_info pv_info;
@@ -353,6 +360,7 @@ extern struct pv_irq_ops pv_irq_ops;
extern struct pv_apic_ops pv_apic_ops;
extern struct pv_mmu_ops pv_mmu_ops;
extern struct pv_lock_ops pv_lock_ops;
+extern struct pv_hotplug_ops pv_hotplug_ops;
#define PARAVIRT_PATCH(x) \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
@@ -1408,6 +1416,25 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
#endif
+#ifdef CONFIG_HOTPLUG_CPU
+
+static inline int __cpu_disable(void)
+{
+ return PVOP_CALL0(int, pv_hotplug_ops.cpu_disable);
+}
+
+static inline void __cpu_die(unsigned int cpu)
+{
+ PVOP_VCALL1(pv_hotplug_ops.cpu_die, cpu);
+}
+
+static inline void play_dead(void)
+{
+ PVOP_VCALL0(pv_hotplug_ops.play_dead);
+}
+
+#endif
+
/* These all sit in the .parainstructions section to tell us what to patch. */
struct paravirt_patch_site {
u8 *instr; /* original instructions */
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 29324c1..f7d153f 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -113,11 +113,20 @@ void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void native_smp_cpus_done(unsigned int max_cpus);
int native_cpu_up(unsigned int cpunum);
+int native_cpu_disable(void);
+void native_cpu_die(unsigned int cpu);
+void native_play_dead(void);
+
void native_send_call_func_ipi(cpumask_t mask);
void native_send_call_func_single_ipi(int cpu);
-extern int __cpu_disable(void);
-extern void __cpu_die(unsigned int cpu);
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define __cpu_disable native_cpu_disable
+#define __cpu_die native_cpu_die
+#endif
void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
--
1.5.4.3
The native case of wb_invd_halt uses inline asm to ensure the compiler doesn't reorder the sequence of instructions, which has the side-effect of skirting around the paravirt hooks for those instructions. Thus, create a new hook for this case.
Signed-off-by: Alex Nixon <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
arch/x86/kernel/paravirt.c | 1 +
include/asm-x86/paravirt.h | 6 ++++++
include/asm-x86/processor.h | 3 ++-
3 files changed, 9 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 8ffe4d1..66d6d0d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -299,6 +299,7 @@ struct pv_irq_ops pv_irq_ops = {
.restore_fl = native_restore_fl,
.irq_disable = native_irq_disable,
.irq_enable = native_irq_enable,
+ .wbinvd_halt = native_wbinvd_halt,
.safe_halt = native_safe_halt,
.halt = native_halt,
#ifdef CONFIG_X86_64
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index fd922c3..5360f3f 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -194,6 +194,7 @@ struct pv_irq_ops {
void (*restore_fl)(unsigned long);
void (*irq_disable)(void);
void (*irq_enable)(void);
+ void (*wbinvd_halt)(void);
void (*safe_halt)(void);
void (*halt)(void);
@@ -716,6 +717,11 @@ static inline void raw_safe_halt(void)
PVOP_VCALL0(pv_irq_ops.safe_halt);
}
+static inline void wbinvd_halt(void)
+{
+ PVOP_VCALL0(pv_irq_ops.wbinvd_halt);
+}
+
static inline void halt(void)
{
PVOP_VCALL0(pv_irq_ops.safe_halt);
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index bcfc727..75db315 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -558,6 +558,7 @@ static inline void load_sp0(struct tss_struct *tss,
native_load_sp0(tss, thread);
}
+#define wbinvd_halt native_wbinvd_halt
#define set_iopl_mask native_set_iopl_mask
#endif /* CONFIG_PARAVIRT */
@@ -733,7 +734,7 @@ extern unsigned long idle_nomwait;
*
* Systems without cache can just go into halt.
*/
-static inline void wbinvd_halt(void)
+static inline void native_wbinvd_halt(void)
{
mb();
/* check for clflush to determine if wbinvd is legal */
--
1.5.4.3
Signed-off-by: Alex Nixon <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
arch/x86/xen/irq.c | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index bb04260..4e3f7f7 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -122,12 +122,19 @@ static void xen_halt(void)
xen_safe_halt();
}
+static void xen_wbinvd_halt(void)
+{
+ native_wbinvd();
+ xen_halt();
+}
+
static const struct pv_irq_ops xen_irq_ops __initdata = {
.init_IRQ = __xen_init_IRQ,
.save_fl = xen_save_fl,
.restore_fl = xen_restore_fl,
.irq_disable = xen_irq_disable,
.irq_enable = xen_irq_enable,
+ .wb_invd_halt = xen_wbinvd_halt,
.safe_halt = xen_safe_halt,
.halt = xen_halt,
#ifdef CONFIG_X86_64
--
1.5.4.3
The removal of the CPU from the various maps was redundant as it already happened in cpu_disable.
After cleaning this up, cpu_uninit only resets the tlb state, so rename it and create a noop version for the X86_64 case (so the two play_deads can be unified later).
Signed-off-by: Alex Nixon <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/common.c | 6 +-----
arch/x86/kernel/process_32.c | 17 ++++-------------
include/asm-x86/smp.h | 7 ++++++-
3 files changed, 11 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d3bc82f..531e054 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -755,14 +755,10 @@ void __cpuinit cpu_init(void)
xsave_init();
}
-#ifdef CONFIG_HOTPLUG_CPU
-void __cpuinit cpu_uninit(void)
+void reset_lazy_tlbstate(void)
{
int cpu = raw_smp_processor_id();
- cpu_clear(cpu, cpu_initialized);
- /* lazy TLB state */
per_cpu(cpu_tlbstate, cpu).state = 0;
per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
}
-#endif
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 53887cd..2db1746 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -83,26 +83,17 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
#define play_dead native_play_dead
#endif
-static void cpu_exit_clear(void)
+/* We don't actually take CPU down, just spin without interrupts. */
+void native_play_dead(void)
{
int cpu = raw_smp_processor_id();
idle_task_exit();
- cpu_uninit();
- irq_ctx_exit(cpu);
+ reset_lazy_tlbstate();
- cpu_clear(cpu, cpu_callout_map);
- cpu_clear(cpu, cpu_callin_map);
-
- numa_remove_cpu(cpu);
-}
+ irq_ctx_exit(cpu);
-/* We don't actually take CPU down, just spin without interrupts. */
-void native_play_dead(void)
-{
- /* This must be done before dead CPU ack */
- cpu_exit_clear();
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index f7d153f..2b97b4c 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -215,7 +215,12 @@ static inline int hard_smp_processor_id(void)
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_HOTPLUG_CPU
-extern void cpu_uninit(void);
+#ifdef CONFIG_X86_32
+extern void reset_lazy_tlbstate(void);
+#else
+static inline void reset_lazy_tlbstate(void)
+{ }
+#endif /* CONFIG_X86_32 */
#endif
#endif /* __ASSEMBLY__ */
--
1.5.4.3
Add the new play_dead into smpboot.c, as it fits more cleanly in there alongside other CONFIG_HOTPLUG functions.
Signed-off-by: Alex Nixon <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
arch/x86/kernel/process_32.c | 38 --------------------------------------
arch/x86/kernel/process_64.c | 28 ----------------------------
arch/x86/kernel/smpboot.c | 25 +++++++++++++++++++++++++
include/asm-x86/smp.h | 1 +
4 files changed, 26 insertions(+), 66 deletions(-)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2db1746..aff137c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -74,44 +74,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return ((unsigned long *)tsk->thread.sp)[3];
}
-#ifdef CONFIG_HOTPLUG_CPU
-#include <asm/nmi.h>
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define play_dead native_play_dead
-#endif
-
-/* We don't actually take CPU down, just spin without interrupts. */
-void native_play_dead(void)
-{
- int cpu = raw_smp_processor_id();
-
- idle_task_exit();
-
- reset_lazy_tlbstate();
-
- irq_ctx_exit(cpu);
-
- mb();
- /* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
-
- /*
- * With physical CPU hotplug, we should halt the cpu
- */
- local_irq_disable();
- /* mask all interrupts, flush any and all caches, and halt */
- wbinvd_halt();
-}
-#else
-void play_dead(void)
-{
- BUG();
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
/*
* The idle thread. There's no useful work to be
* done, so just try to conserve power and have a
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 7512044..b2bab8e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -86,34 +86,6 @@ void exit_idle(void)
return;
__exit_idle();
}
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define play_dead native_play_dead
-#endif
-
-#ifdef CONFIG_HOTPLUG_CPU
-DECLARE_PER_CPU(int, cpu_state);
-
-#include <linux/nmi.h>
-/* We halt the CPU with physical CPU hotplug */
-void native_play_dead(void)
-{
- idle_task_exit();
- mb();
- /* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
-
- local_irq_disable();
- /* mask all interrupts, flush any and all caches, and halt */
- wbinvd_halt();
-}
-#else
-void native_play_dead(void)
-{
- BUG();
-}
-#endif /* CONFIG_HOTPLUG_CPU */
/*
* The idle thread. There's no useful work to be
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2a555a1..83e9591 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1396,6 +1396,25 @@ void native_cpu_die(unsigned int cpu)
}
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
+
+void native_play_dead(void)
+{
+ idle_task_exit();
+ reset_lazy_tlbstate();
+ irq_ctx_exit(raw_smp_processor_id());
+
+ mb();
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ /*
+ * With physical CPU hotplug, we should halt the cpu
+ */
+ local_irq_disable();
+ /* mask all interrupts, flush any and all caches, and halt */
+ wbinvd_halt();
+}
+
#else /* ... !CONFIG_HOTPLUG_CPU */
int native_cpu_disable(void)
{
@@ -1407,4 +1426,10 @@ void native_cpu_die(unsigned int cpu)
/* We said "no" in __cpu_disable */
BUG();
}
+
+void native_play_dead(void)
+{
+ BUG();
+}
+
#endif
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 2b97b4c..fd2a070 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -126,6 +126,7 @@ void native_send_call_func_single_ipi(int cpu);
#else
#define __cpu_disable native_cpu_disable
#define __cpu_die native_cpu_die
+#define play_dead native_play_dead
#endif
void smp_store_cpu_info(int id);
--
1.5.4.3
It allows paravirt implementations of cpu_disable to share the cpu_disable_common code, without having to take on board APIC writes, which may not be appropriate.
Signed-off-by: Alex Nixon <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
arch/x86/kernel/smpboot.c | 40 +++++++++++++++++++++++-----------------
include/asm-x86/smp.h | 1 +
2 files changed, 24 insertions(+), 17 deletions(-)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 83e9591..ccb3b12 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1340,25 +1340,9 @@ static void __ref remove_cpu_from_maps(int cpu)
numa_remove_cpu(cpu);
}
-int native_cpu_disable(void)
+void cpu_disable_common(void)
{
int cpu = smp_processor_id();
-
- /*
- * Perhaps use cpufreq to drop frequency, but that could go
- * into generic code.
- *
- * We won't take down the boot processor on i386 due to some
- * interrupts only being able to be serviced by the BSP.
- * Especially so if we're not using an IOAPIC -zwane
- */
- if (cpu == 0)
- return -EBUSY;
-
- if (nmi_watchdog == NMI_LOCAL_APIC)
- stop_apic_nmi_watchdog(NULL);
- clear_local_APIC();
-
/*
* HACK:
* Allow any queued timer interrupts to get serviced
@@ -1376,6 +1360,28 @@ int native_cpu_disable(void)
remove_cpu_from_maps(cpu);
unlock_vector_lock();
fixup_irqs(cpu_online_map);
+}
+
+int native_cpu_disable(void)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * Perhaps use cpufreq to drop frequency, but that could go
+ * into generic code.
+ *
+ * We won't take down the boot processor on i386 due to some
+ * interrupts only being able to be serviced by the BSP.
+ * Especially so if we're not using an IOAPIC -zwane
+ */
+ if (cpu == 0)
+ return -EBUSY;
+
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ stop_apic_nmi_watchdog(NULL);
+ clear_local_APIC();
+
+ cpu_disable_common();
return 0;
}
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index fd2a070..bff3fa6 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -109,6 +109,7 @@ static inline void arch_send_call_function_ipi(cpumask_t mask)
smp_ops.send_call_func_ipi(mask);
}
+void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void native_smp_cpus_done(unsigned int max_cpus);
--
1.5.4.3
Note the changes from 2.6.18-xen CPU hotplugging:
A vcpu_down request from the remote admin via Xenbus both hotunplugs the CPU, and disables it by removing it from the cpu_present map, and removing its entry in /sys
A vcpu_up request from the remote admin only re-enables the CPU, and does not immediately bring the CPU up. A udev event is emitted, which can be caught by the user if he wishes to automatically re-up CPUs when available, or implement a more complex policy.
Signed-off-by: Alex Nixon <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
arch/x86/xen/enlighten.c | 7 +++
arch/x86/xen/irq.c | 2 +-
arch/x86/xen/smp.c | 52 +++++++++++++++++++++-----
arch/x86/xen/spinlock.c | 5 ++
arch/x86/xen/time.c | 8 ++++
arch/x86/xen/xen-ops.h | 6 +++
drivers/xen/Makefile | 2 +-
drivers/xen/cpu_hotplug.c | 90 +++++++++++++++++++++++++++++++++++++++++++++
drivers/xen/events.c | 4 ++
9 files changed, 164 insertions(+), 12 deletions(-)
create mode 100644 drivers/xen/cpu_hotplug.c
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c421049..204d64b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1342,6 +1342,12 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
.set_fixmap = xen_set_fixmap,
};
+static const struct pv_hotplug_ops xen_hotplug_ops = {
+ .play_dead = xen_play_dead,
+ .cpu_disable = xen_cpu_disable,
+ .cpu_die = xen_cpu_die,
+};
+
static void xen_reboot(int reason)
{
struct sched_shutdown r = { .reason = reason };
@@ -1655,6 +1661,7 @@ asmlinkage void __init xen_start_kernel(void)
pv_cpu_ops = xen_cpu_ops;
pv_apic_ops = xen_apic_ops;
pv_mmu_ops = xen_mmu_ops;
+ pv_hotplug_ops = xen_hotplug_ops;
xen_init_irq_ops();
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 4e3f7f7..f33f75b 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -134,7 +134,7 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
.restore_fl = xen_restore_fl,
.irq_disable = xen_irq_disable,
.irq_enable = xen_irq_enable,
- .wb_invd_halt = xen_wbinvd_halt,
+ .wbinvd_halt = xen_wbinvd_halt,
.safe_halt = xen_safe_halt,
.halt = xen_halt,
#ifdef CONFIG_X86_64
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index baca7f2..682eaa4 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -12,7 +12,6 @@
* result, all CPUs are treated as if they're single-core and
* single-threaded.
*
- * This does not handle HOTPLUG_CPU yet.
*/
#include <linux/sched.h>
#include <linux/err.h>
@@ -61,11 +60,12 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static __cpuinit void cpu_bringup_and_idle(void)
+static __cpuinit void cpu_bringup(void)
{
int cpu = smp_processor_id();
cpu_init();
+ touch_softlockup_watchdog();
preempt_disable();
xen_enable_sysenter();
@@ -86,6 +86,11 @@ static __cpuinit void cpu_bringup_and_idle(void)
local_irq_enable();
wmb(); /* make sure everything is out */
+}
+
+static __cpuinit void cpu_bringup_and_idle(void)
+{
+ cpu_bringup();
cpu_idle();
}
@@ -209,8 +214,6 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
cpu_set(cpu, cpu_present_map);
}
-
- //init_xenbus_allowed_cpumask();
}
static __cpuinit int
@@ -278,12 +281,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
struct task_struct *idle = idle_task(cpu);
int rc;
-#if 0
- rc = cpu_up_check(cpu);
- if (rc)
- return rc;
-#endif
-
#ifdef CONFIG_X86_64
/* Allocate node local memory for AP pdas */
WARN_ON(cpu == 0);
@@ -336,6 +333,41 @@ static void xen_smp_cpus_done(unsigned int max_cpus)
{
}
+int xen_cpu_disable(void)
+{
+ unsigned int cpu = smp_processor_id();
+ if (cpu == 0)
+ return -EBUSY;
+
+ cpu_disable_common();
+
+ load_cr3(swapper_pg_dir);
+ return 0;
+}
+
+void xen_cpu_die(unsigned int cpu)
+{
+ while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
+ current->state = TASK_UNINTERRUPTIBLE;
+ schedule_timeout(HZ/10);
+ }
+ unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+ xen_uninit_lock_cpu(cpu);
+ xen_teardown_timer(cpu);
+
+ if (num_online_cpus() == 1)
+ alternatives_smp_switch(0);
+}
+
+void xen_play_dead(void)
+{
+ native_play_dead();
+ cpu_bringup();
+}
+
static void stop_self(void *v)
{
int cpu = smp_processor_id();
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index bfb1707..74a5114 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -173,6 +173,11 @@ void __cpuinit xen_init_lock_cpu(int cpu)
printk("cpu %d spinlock event irq %d\n", cpu, irq);
}
+void xen_uninit_lock_cpu(int cpu)
+{
+ unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
+}
+
void __init xen_init_spinlocks(void)
{
pv_lock_ops.spin_is_locked = xen_spin_is_locked;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 685b774..8034d69 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -452,6 +452,14 @@ void xen_setup_timer(int cpu)
setup_runstate_info(cpu);
}
+void xen_teardown_timer(int cpu)
+{
+ struct clock_event_device *evt;
+ BUG_ON(cpu == 0);
+ evt = &per_cpu(xen_clock_events, cpu);
+ unbind_from_irqhandler(evt->irq, NULL);
+}
+
void xen_setup_cpu_clockevents(void)
{
BUG_ON(preemptible());
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 3c70ebc..a16e5b5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -33,6 +33,7 @@ void __init xen_build_dynamic_phys_to_machine(void);
void xen_init_irq_ops(void);
void xen_setup_timer(int cpu);
+void xen_teardown_timer(int cpu);
void xen_setup_cpu_clockevents(void);
unsigned long xen_tsc_khz(void);
void __init xen_time_init(void);
@@ -48,11 +49,16 @@ void xen_mark_init_mm_pinned(void);
void __init xen_setup_vcpu_info_placement(void);
+void xen_play_dead(void);
+void xen_cpu_die(unsigned int cpu);
+int xen_cpu_disable(void);
+
#ifdef CONFIG_SMP
void xen_smp_init(void);
void __init xen_init_spinlocks(void);
__cpuinit void xen_init_lock_cpu(int cpu);
+void xen_uninit_lock_cpu(int cpu);
extern cpumask_t xen_cpu_initialized_map;
#else
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 363286c..f62d8df 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,4 +1,4 @@
-obj-y += grant-table.o features.o events.o manage.o
+obj-y += grant-table.o features.o events.o manage.o cpu_hotplug.o
obj-y += xenbus/
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += balloon.o
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
new file mode 100644
index 0000000..f1727ce
--- /dev/null
+++ b/drivers/xen/cpu_hotplug.c
@@ -0,0 +1,90 @@
+#include <linux/notifier.h>
+
+#include <xen/xenbus.h>
+
+#include <asm-x86/xen/hypervisor.h>
+#include <asm/cpu.h>
+
+static void enable_hotplug_cpu(int cpu)
+{
+ if (!cpu_present(cpu))
+ arch_register_cpu(cpu);
+
+ cpu_set(cpu, cpu_present_map);
+}
+
+static void disable_hotplug_cpu(int cpu)
+{
+ if (cpu_present(cpu))
+ arch_unregister_cpu(cpu);
+
+ cpu_clear(cpu, cpu_present_map);
+}
+
+static void vcpu_hotplug(unsigned int cpu)
+{
+ int err;
+ char dir[32], state[32];
+
+ if (!cpu_possible(cpu))
+ return;
+
+ sprintf(dir, "cpu/%u", cpu);
+ err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
+ if (err != 1) {
+ printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
+ return;
+ }
+
+ if (strcmp(state, "online") == 0) {
+ enable_hotplug_cpu(cpu);
+ } else if (strcmp(state, "offline") == 0) {
+ (void)cpu_down(cpu);
+ disable_hotplug_cpu(cpu);
+ } else {
+ printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
+ state, cpu);
+ }
+}
+
+static void handle_vcpu_hotplug_event(
+ struct xenbus_watch *watch, const char **vec, unsigned int len)
+{
+ unsigned int cpu;
+ char *cpustr;
+ const char *node = vec[XS_WATCH_PATH];
+
+ cpustr = strstr(node, "cpu/");
+ if (cpustr != NULL) {
+ sscanf(cpustr, "cpu/%u", &cpu);
+ vcpu_hotplug(cpu);
+ }
+}
+
+static int setup_cpu_watcher(struct notifier_block *notifier,
+ unsigned long event, void *data)
+{
+ static struct xenbus_watch cpu_watch = {
+ .node = "cpu",
+ .callback = handle_vcpu_hotplug_event};
+
+ (void)register_xenbus_watch(&cpu_watch);
+
+ return NOTIFY_DONE;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+ static struct notifier_block xsn_cpu = {
+ .notifier_call = setup_cpu_watcher };
+
+ if (!is_running_on_xen())
+ return -ENODEV;
+
+ register_xenstore_notifier(&xsn_cpu);
+
+ return 0;
+}
+
+arch_initcall(setup_vcpu_hotplug_event);
+
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 2a49ffc..63ca861 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -358,6 +358,10 @@ static void unbind_from_irq(unsigned int irq)
per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
[index_from_irq(irq)] = -1;
break;
+ case IRQT_IPI:
+ per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
+ [index_from_irq(irq)] = -1;
+ break;
default:
break;
}
--
1.5.4.3
Alex Nixon wrote:
> The native case of wb_invd_halt uses inline asm to ensure the compiler doesn't reorder the sequence of instructions, which has the side-effect of skirting around the paravirt hooks for those instructions. Thus, create a new hook for this case.
>
Not necessary. Halting in Xen doesn't require the caches to be flushed
in this way, and I think that would be true for any virtual environment
(and if it were, the invalidate/halt instruction sequence would work
anyway).
The correct fix is to make sure that the native_play_dead calls this,
but the xen_play_dead doesn't (ie, make sure it's not in the common code
path).
J
> Signed-off-by: Alex Nixon <[email protected]>
> Cc: Jeremy Fitzhardinge <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> ---
> arch/x86/kernel/paravirt.c | 1 +
> include/asm-x86/paravirt.h | 6 ++++++
> include/asm-x86/processor.h | 3 ++-
> 3 files changed, 9 insertions(+), 1 deletions(-)
>
> diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
> index 8ffe4d1..66d6d0d 100644
> --- a/arch/x86/kernel/paravirt.c
> +++ b/arch/x86/kernel/paravirt.c
> @@ -299,6 +299,7 @@ struct pv_irq_ops pv_irq_ops = {
> .restore_fl = native_restore_fl,
> .irq_disable = native_irq_disable,
> .irq_enable = native_irq_enable,
> + .wbinvd_halt = native_wbinvd_halt,
> .safe_halt = native_safe_halt,
> .halt = native_halt,
> #ifdef CONFIG_X86_64
> diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
> index fd922c3..5360f3f 100644
> --- a/include/asm-x86/paravirt.h
> +++ b/include/asm-x86/paravirt.h
> @@ -194,6 +194,7 @@ struct pv_irq_ops {
> void (*restore_fl)(unsigned long);
> void (*irq_disable)(void);
> void (*irq_enable)(void);
> + void (*wbinvd_halt)(void);
> void (*safe_halt)(void);
> void (*halt)(void);
>
> @@ -716,6 +717,11 @@ static inline void raw_safe_halt(void)
> PVOP_VCALL0(pv_irq_ops.safe_halt);
> }
>
> +static inline void wbinvd_halt(void)
> +{
> + PVOP_VCALL0(pv_irq_ops.wbinvd_halt);
> +}
> +
> static inline void halt(void)
> {
> PVOP_VCALL0(pv_irq_ops.safe_halt);
> diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
> index bcfc727..75db315 100644
> --- a/include/asm-x86/processor.h
> +++ b/include/asm-x86/processor.h
> @@ -558,6 +558,7 @@ static inline void load_sp0(struct tss_struct *tss,
> native_load_sp0(tss, thread);
> }
>
> +#define wbinvd_halt native_wbinvd_halt
> #define set_iopl_mask native_set_iopl_mask
> #endif /* CONFIG_PARAVIRT */
>
> @@ -733,7 +734,7 @@ extern unsigned long idle_nomwait;
> *
> * Systems without cache can just go into halt.
> */
> -static inline void wbinvd_halt(void)
> +static inline void native_wbinvd_halt(void)
> {
> mb();
> /* check for clflush to determine if wbinvd is legal */
>
Alex Nixon wrote:
> Signed-off-by: Alex Nixon <[email protected]>
> Cc: Jeremy Fitzhardinge <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> ---
> arch/x86/kernel/paravirt.c | 8 ++++++++
> arch/x86/kernel/process_32.c | 10 ++++++++--
> arch/x86/kernel/process_64.c | 9 +++++++--
> arch/x86/kernel/smpboot.c | 8 ++++----
> include/asm-x86/paravirt.h | 27 +++++++++++++++++++++++++++
> include/asm-x86/smp.h | 13 +++++++++++--
> 6 files changed, 65 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
> index 7cd2747..8ffe4d1 100644
> --- a/arch/x86/kernel/paravirt.c
> +++ b/arch/x86/kernel/paravirt.c
> @@ -125,6 +125,7 @@ static void *get_call_destination(u8 type)
> .pv_apic_ops = pv_apic_ops,
> .pv_mmu_ops = pv_mmu_ops,
> .pv_lock_ops = pv_lock_ops,
> + .pv_hotplug_ops = pv_hotplug_ops,
>
I think I'd prefer these to be part of smp_ops. We're only talking
about three new operations, and they're the logical compliments of the
existing smp_ops functions for bringing up cpus.
J
> };
> return *((void **)&tmpl + type);
> }
> @@ -455,9 +456,16 @@ struct pv_mmu_ops pv_mmu_ops = {
> .set_fixmap = native_set_fixmap,
> };
>
> +struct pv_hotplug_ops pv_hotplug_ops = {
> + .cpu_die = native_cpu_die,
> + .cpu_disable = native_cpu_disable,
> + .play_dead = native_play_dead,
> +};
> +
> EXPORT_SYMBOL_GPL(pv_time_ops);
> EXPORT_SYMBOL (pv_cpu_ops);
> EXPORT_SYMBOL (pv_mmu_ops);
> EXPORT_SYMBOL_GPL(pv_apic_ops);
> EXPORT_SYMBOL_GPL(pv_info);
> EXPORT_SYMBOL (pv_irq_ops);
> +EXPORT_SYMBOL (pv_hotplug_ops);
> diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
> index 593b73e..53887cd 100644
> --- a/arch/x86/kernel/process_32.c
> +++ b/arch/x86/kernel/process_32.c
> @@ -77,6 +77,12 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
> #ifdef CONFIG_HOTPLUG_CPU
> #include <asm/nmi.h>
>
> +#ifdef CONFIG_PARAVIRT
> +#include <asm/paravirt.h>
> +#else
> +#define play_dead native_play_dead
> +#endif
> +
> static void cpu_exit_clear(void)
> {
> int cpu = raw_smp_processor_id();
> @@ -93,7 +99,7 @@ static void cpu_exit_clear(void)
> }
>
> /* We don't actually take CPU down, just spin without interrupts. */
> -static inline void play_dead(void)
> +void native_play_dead(void)
> {
> /* This must be done before dead CPU ack */
> cpu_exit_clear();
> @@ -109,7 +115,7 @@ static inline void play_dead(void)
> wbinvd_halt();
> }
> #else
> -static inline void play_dead(void)
> +void play_dead(void)
> {
> BUG();
> }
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index 8248dc0..7512044 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -86,13 +86,18 @@ void exit_idle(void)
> return;
> __exit_idle();
> }
> +#ifdef CONFIG_PARAVIRT
> +#include <asm/paravirt.h>
> +#else
> +#define play_dead native_play_dead
> +#endif
>
> #ifdef CONFIG_HOTPLUG_CPU
> DECLARE_PER_CPU(int, cpu_state);
>
> #include <linux/nmi.h>
> /* We halt the CPU with physical CPU hotplug */
> -static inline void play_dead(void)
> +void native_play_dead(void)
> {
> idle_task_exit();
> mb();
> @@ -104,7 +109,7 @@ static inline void play_dead(void)
> wbinvd_halt();
> }
> #else
> -static inline void play_dead(void)
> +void native_play_dead(void)
> {
> BUG();
> }
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index eea2fd6..2a555a1 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -1340,7 +1340,7 @@ static void __ref remove_cpu_from_maps(int cpu)
> numa_remove_cpu(cpu);
> }
>
> -int __cpu_disable(void)
> +int native_cpu_disable(void)
> {
> int cpu = smp_processor_id();
>
> @@ -1379,7 +1379,7 @@ int __cpu_disable(void)
> return 0;
> }
>
> -void __cpu_die(unsigned int cpu)
> +void native_cpu_die(unsigned int cpu)
> {
> /* We don't do anything here: idle task is faking death itself. */
> unsigned int i;
> @@ -1397,12 +1397,12 @@ void __cpu_die(unsigned int cpu)
> printk(KERN_ERR "CPU %u didn't die...\n", cpu);
> }
> #else /* ... !CONFIG_HOTPLUG_CPU */
> -int __cpu_disable(void)
> +int native_cpu_disable(void)
> {
> return -ENOSYS;
> }
>
> -void __cpu_die(unsigned int cpu)
> +void native_cpu_die(unsigned int cpu)
> {
> /* We said "no" in __cpu_disable */
> BUG();
> diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
> index eca8c4f..fd922c3 100644
> --- a/include/asm-x86/paravirt.h
> +++ b/include/asm-x86/paravirt.h
> @@ -332,6 +332,12 @@ struct pv_lock_ops {
> void (*spin_unlock)(struct raw_spinlock *lock);
> };
>
> +struct pv_hotplug_ops {
> + void (*play_dead)(void);
> + int (*cpu_disable)(void);
> + void (*cpu_die)(unsigned int);
> +};
> +
> /* This contains all the paravirt structures: we get a convenient
> * number for each function using the offset which we use to indicate
> * what to patch. */
> @@ -343,6 +349,7 @@ struct paravirt_patch_template {
> struct pv_apic_ops pv_apic_ops;
> struct pv_mmu_ops pv_mmu_ops;
> struct pv_lock_ops pv_lock_ops;
> + struct pv_hotplug_ops pv_hotplug_ops;
> };
>
> extern struct pv_info pv_info;
> @@ -353,6 +360,7 @@ extern struct pv_irq_ops pv_irq_ops;
> extern struct pv_apic_ops pv_apic_ops;
> extern struct pv_mmu_ops pv_mmu_ops;
> extern struct pv_lock_ops pv_lock_ops;
> +extern struct pv_hotplug_ops pv_hotplug_ops;
>
> #define PARAVIRT_PATCH(x) \
> (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
> @@ -1408,6 +1416,25 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
>
> #endif
>
> +#ifdef CONFIG_HOTPLUG_CPU
> +
> +static inline int __cpu_disable(void)
> +{
> + return PVOP_CALL0(int, pv_hotplug_ops.cpu_disable);
> +}
> +
> +static inline void __cpu_die(unsigned int cpu)
> +{
> + PVOP_VCALL1(pv_hotplug_ops.cpu_die, cpu);
> +}
> +
> +static inline void play_dead(void)
> +{
> + PVOP_VCALL0(pv_hotplug_ops.play_dead);
> +}
> +
> +#endif
> +
> /* These all sit in the .parainstructions section to tell us what to patch. */
> struct paravirt_patch_site {
> u8 *instr; /* original instructions */
> diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
> index 29324c1..f7d153f 100644
> --- a/include/asm-x86/smp.h
> +++ b/include/asm-x86/smp.h
> @@ -113,11 +113,20 @@ void native_smp_prepare_boot_cpu(void);
> void native_smp_prepare_cpus(unsigned int max_cpus);
> void native_smp_cpus_done(unsigned int max_cpus);
> int native_cpu_up(unsigned int cpunum);
> +int native_cpu_disable(void);
> +void native_cpu_die(unsigned int cpu);
> +void native_play_dead(void);
> +
> void native_send_call_func_ipi(cpumask_t mask);
> void native_send_call_func_single_ipi(int cpu);
>
> -extern int __cpu_disable(void);
> -extern void __cpu_die(unsigned int cpu);
> +
> +#ifdef CONFIG_PARAVIRT
> +#include <asm/paravirt.h>
> +#else
> +#define __cpu_disable native_cpu_disable
> +#define __cpu_die native_cpu_die
> +#endif
>
> void smp_store_cpu_info(int id);
> #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
>
Alex Nixon wrote:
> Note the changes from 2.6.18-xen CPU hotplugging:
>
> A vcpu_down request from the remote admin via Xenbus both hotunplugs the CPU, and disables it by removing it from the cpu_present map, and removing its entry in /sys
>
> A vcpu_up request from the remote admin only re-enables the CPU, and does not immediately bring the CPU up. A udev event is emitted, which can be caught by the user if he wishes to automatically re-up CPUs when available, or implement a more complex policy.
>
Good, that's better.
> Signed-off-by: Alex Nixon <[email protected]>
> Cc: Jeremy Fitzhardinge <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> ---
> arch/x86/xen/enlighten.c | 7 +++
> arch/x86/xen/irq.c | 2 +-
> arch/x86/xen/smp.c | 52 +++++++++++++++++++++-----
> arch/x86/xen/spinlock.c | 5 ++
> arch/x86/xen/time.c | 8 ++++
> arch/x86/xen/xen-ops.h | 6 +++
> drivers/xen/Makefile | 2 +-
> drivers/xen/cpu_hotplug.c | 90 +++++++++++++++++++++++++++++++++++++++++++++
> drivers/xen/events.c | 4 ++
> 9 files changed, 164 insertions(+), 12 deletions(-)
> create mode 100644 drivers/xen/cpu_hotplug.c
>
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index c421049..204d64b 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1342,6 +1342,12 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
> .set_fixmap = xen_set_fixmap,
> };
>
> +static const struct pv_hotplug_ops xen_hotplug_ops = {
> + .play_dead = xen_play_dead,
> + .cpu_disable = xen_cpu_disable,
> + .cpu_die = xen_cpu_die,
> +};
> +
> static void xen_reboot(int reason)
> {
> struct sched_shutdown r = { .reason = reason };
> @@ -1655,6 +1661,7 @@ asmlinkage void __init xen_start_kernel(void)
> pv_cpu_ops = xen_cpu_ops;
> pv_apic_ops = xen_apic_ops;
> pv_mmu_ops = xen_mmu_ops;
> + pv_hotplug_ops = xen_hotplug_ops;
>
> xen_init_irq_ops();
>
> diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
> index 4e3f7f7..f33f75b 100644
> --- a/arch/x86/xen/irq.c
> +++ b/arch/x86/xen/irq.c
> @@ -134,7 +134,7 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
> .restore_fl = xen_restore_fl,
> .irq_disable = xen_irq_disable,
> .irq_enable = xen_irq_enable,
> - .wb_invd_halt = xen_wbinvd_halt,
> + .wbinvd_halt = xen_wbinvd_halt,
>
What's this? A typo fix? It should be folded back to the appropriate
patch (except for the fact that I think this op should be gone).
> .safe_halt = xen_safe_halt,
> .halt = xen_halt,
> #ifdef CONFIG_X86_64
> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
> index baca7f2..682eaa4 100644
> --- a/arch/x86/xen/smp.c
> +++ b/arch/x86/xen/smp.c
> @@ -12,7 +12,6 @@
> * result, all CPUs are treated as if they're single-core and
> * single-threaded.
> *
> - * This does not handle HOTPLUG_CPU yet.
>
Remove the blank line too.
> */
> #include <linux/sched.h>
> #include <linux/err.h>
> @@ -61,11 +60,12 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
> return IRQ_HANDLED;
> }
>
> -static __cpuinit void cpu_bringup_and_idle(void)
> +static __cpuinit void cpu_bringup(void)
> {
> int cpu = smp_processor_id();
>
> cpu_init();
> + touch_softlockup_watchdog();
> preempt_disable();
>
> xen_enable_sysenter();
> @@ -86,6 +86,11 @@ static __cpuinit void cpu_bringup_and_idle(void)
> local_irq_enable();
>
> wmb(); /* make sure everything is out */
> +}
> +
> +static __cpuinit void cpu_bringup_and_idle(void)
> +{
> + cpu_bringup();
> cpu_idle();
> }
>
> @@ -209,8 +214,6 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
>
> cpu_set(cpu, cpu_present_map);
> }
> -
> - //init_xenbus_allowed_cpumask();
> }
>
> static __cpuinit int
> @@ -278,12 +281,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
> struct task_struct *idle = idle_task(cpu);
> int rc;
>
> -#if 0
> - rc = cpu_up_check(cpu);
> - if (rc)
> - return rc;
> -#endif
> -
> #ifdef CONFIG_X86_64
> /* Allocate node local memory for AP pdas */
> WARN_ON(cpu == 0);
> @@ -336,6 +333,41 @@ static void xen_smp_cpus_done(unsigned int max_cpus)
> {
> }
>
> +int xen_cpu_disable(void)
> +{
> + unsigned int cpu = smp_processor_id();
> + if (cpu == 0)
> + return -EBUSY;
> +
> + cpu_disable_common();
> +
> + load_cr3(swapper_pg_dir);
> + return 0;
> +}
> +
> +void xen_cpu_die(unsigned int cpu)
> +{
> + while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
> + current->state = TASK_UNINTERRUPTIBLE;
> + schedule_timeout(HZ/10);
> + }
> + unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
> + unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
> + unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
> + unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
> + xen_uninit_lock_cpu(cpu);
> + xen_teardown_timer(cpu);
> +
> + if (num_online_cpus() == 1)
> + alternatives_smp_switch(0);
> +}
> +
> +void xen_play_dead(void)
> +{
> + native_play_dead();
> + cpu_bringup();
>
No, call common_play_dead(), then xen_halt(), then cpu_bringup().
> +}
> +
> static void stop_self(void *v)
> {
> int cpu = smp_processor_id();
> diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
> index bfb1707..74a5114 100644
> --- a/arch/x86/xen/spinlock.c
> +++ b/arch/x86/xen/spinlock.c
> @@ -173,6 +173,11 @@ void __cpuinit xen_init_lock_cpu(int cpu)
> printk("cpu %d spinlock event irq %d\n", cpu, irq);
> }
>
> +void xen_uninit_lock_cpu(int cpu)
> +{
> + unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
> +}
> +
> void __init xen_init_spinlocks(void)
> {
> pv_lock_ops.spin_is_locked = xen_spin_is_locked;
> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> index 685b774..8034d69 100644
> --- a/arch/x86/xen/time.c
> +++ b/arch/x86/xen/time.c
> @@ -452,6 +452,14 @@ void xen_setup_timer(int cpu)
> setup_runstate_info(cpu);
> }
>
> +void xen_teardown_timer(int cpu)
> +{
> + struct clock_event_device *evt;
> + BUG_ON(cpu == 0);
> + evt = &per_cpu(xen_clock_events, cpu);
> + unbind_from_irqhandler(evt->irq, NULL);
> +}
> +
> void xen_setup_cpu_clockevents(void)
> {
> BUG_ON(preemptible());
> diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
> index 3c70ebc..a16e5b5 100644
> --- a/arch/x86/xen/xen-ops.h
> +++ b/arch/x86/xen/xen-ops.h
> @@ -33,6 +33,7 @@ void __init xen_build_dynamic_phys_to_machine(void);
>
> void xen_init_irq_ops(void);
> void xen_setup_timer(int cpu);
> +void xen_teardown_timer(int cpu);
> void xen_setup_cpu_clockevents(void);
> unsigned long xen_tsc_khz(void);
> void __init xen_time_init(void);
> @@ -48,11 +49,16 @@ void xen_mark_init_mm_pinned(void);
>
> void __init xen_setup_vcpu_info_placement(void);
>
> +void xen_play_dead(void);
> +void xen_cpu_die(unsigned int cpu);
> +int xen_cpu_disable(void);
> +
> #ifdef CONFIG_SMP
> void xen_smp_init(void);
>
> void __init xen_init_spinlocks(void);
> __cpuinit void xen_init_lock_cpu(int cpu);
> +void xen_uninit_lock_cpu(int cpu);
>
> extern cpumask_t xen_cpu_initialized_map;
> #else
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 363286c..f62d8df 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -1,4 +1,4 @@
> -obj-y += grant-table.o features.o events.o manage.o
> +obj-y += grant-table.o features.o events.o manage.o cpu_hotplug.o
> obj-y += xenbus/
> obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
> obj-$(CONFIG_XEN_BALLOON) += balloon.o
> diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
> new file mode 100644
> index 0000000..f1727ce
> --- /dev/null
> +++ b/drivers/xen/cpu_hotplug.c
> @@ -0,0 +1,90 @@
> +#include <linux/notifier.h>
> +
> +#include <xen/xenbus.h>
> +
> +#include <asm-x86/xen/hypervisor.h>
> +#include <asm/cpu.h>
> +
> +static void enable_hotplug_cpu(int cpu)
> +{
> + if (!cpu_present(cpu))
> + arch_register_cpu(cpu);
> +
> + cpu_set(cpu, cpu_present_map);
> +}
> +
> +static void disable_hotplug_cpu(int cpu)
> +{
> + if (cpu_present(cpu))
> + arch_unregister_cpu(cpu);
> +
> + cpu_clear(cpu, cpu_present_map);
> +}
> +
> +static void vcpu_hotplug(unsigned int cpu)
> +{
> + int err;
> + char dir[32], state[32];
> +
> + if (!cpu_possible(cpu))
> + return;
> +
> + sprintf(dir, "cpu/%u", cpu);
> + err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
> + if (err != 1) {
> + printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
> + return;
> + }
> +
> + if (strcmp(state, "online") == 0) {
> + enable_hotplug_cpu(cpu);
> + } else if (strcmp(state, "offline") == 0) {
> + (void)cpu_down(cpu);
> + disable_hotplug_cpu(cpu);
> + } else {
> + printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
> + state, cpu);
> + }
> +}
> +
> +static void handle_vcpu_hotplug_event(
> + struct xenbus_watch *watch, const char **vec, unsigned int len)
>
Fix the formatting here (split the line at a ',').
> +{
> + unsigned int cpu;
> + char *cpustr;
> + const char *node = vec[XS_WATCH_PATH];
> +
> + cpustr = strstr(node, "cpu/");
> + if (cpustr != NULL) {
> + sscanf(cpustr, "cpu/%u", &cpu);
> + vcpu_hotplug(cpu);
> + }
> +}
> +
> +static int setup_cpu_watcher(struct notifier_block *notifier,
> + unsigned long event, void *data)
> +{
> + static struct xenbus_watch cpu_watch = {
> + .node = "cpu",
> + .callback = handle_vcpu_hotplug_event};
> +
> + (void)register_xenbus_watch(&cpu_watch);
> +
> + return NOTIFY_DONE;
> +}
> +
> +static int __init setup_vcpu_hotplug_event(void)
> +{
> + static struct notifier_block xsn_cpu = {
> + .notifier_call = setup_cpu_watcher };
> +
> + if (!is_running_on_xen())
> + return -ENODEV;
> +
> + register_xenstore_notifier(&xsn_cpu);
> +
> + return 0;
> +}
> +
> +arch_initcall(setup_vcpu_hotplug_event);
> +
> diff --git a/drivers/xen/events.c b/drivers/xen/events.c
> index 2a49ffc..63ca861 100644
> --- a/drivers/xen/events.c
> +++ b/drivers/xen/events.c
> @@ -358,6 +358,10 @@ static void unbind_from_irq(unsigned int irq)
> per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
> [index_from_irq(irq)] = -1;
> break;
> + case IRQT_IPI:
> + per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
> + [index_from_irq(irq)] = -1;
> + break;
> default:
> break;
> }
>