We can know the guest is panicked when the guest runs on xen.
But we do not have such feature on kvm.
Another purpose of this feature is: management app(for example:
libvirt) can do auto dump when the guest is panicked. If management
app does not do auto dump, the guest's user can do dump by hand if
he sees the guest is panicked.
We have three solutions to implement this feature:
1. use vmcall
2. use I/O port
3. use virtio-serial.
We have decided to avoid touching hypervisor. The reason why I choose
choose the I/O port is:
1. it is easier to implememt
2. it does not depend any virtual device
3. it can work when starting the kernel
Signed-off-by: Wen Congyang <[email protected]>
Signed-off-by: Hu Tao <[email protected]>
---
changes from v10:
- add a kernel parameter to disable pv-event
- detailed documentation to describe pv event interface
- make kvm_pv_event_init() local
Documentation/virtual/kvm/pv_event.txt | 38 +++++++++++++++++++++++++
arch/ia64/include/asm/kvm_para.h | 14 ++++++++++
arch/powerpc/include/asm/kvm_para.h | 14 ++++++++++
arch/s390/include/asm/kvm_para.h | 14 ++++++++++
arch/x86/include/asm/kvm_para.h | 21 ++++++++++++++
arch/x86/kernel/kvm.c | 48 ++++++++++++++++++++++++++++++++
include/linux/kvm_para.h | 18 ++++++++++++
7 files changed, 167 insertions(+)
create mode 100644 Documentation/virtual/kvm/pv_event.txt
diff --git a/Documentation/virtual/kvm/pv_event.txt b/Documentation/virtual/kvm/pv_event.txt
new file mode 100644
index 0000000..247379f
--- /dev/null
+++ b/Documentation/virtual/kvm/pv_event.txt
@@ -0,0 +1,38 @@
+The KVM Paravirtual Event Interface
+=================================
+
+The KVM Paravirtual Event Interface defines a simple interface,
+by which guest OS can inform hypervisor that something happened.
+
+To inform hypervisor of events, guest writes a 32-bit integer to
+the Interface. Each bit of the integer represents an event, if a
+bit is set, the corresponding event happens.
+
+To query events supported by hypervisor, guest reads from the
+Interface. If a bit is set, the corresponding event is supported.
+
+The Interface supports up to 32 events. Currently there is 1 event
+defined, as follow:
+
+KVM_PV_FEATURE_PANICKED 0
+
+
+Querying whether the event can be ejected
+======================
+kvm_pv_has_feature()
+Arguments:
+ feature: The bit value of this paravirtual event to query
+
+Return Value:
+ 0: The guest kernel can't eject this paravirtual event.
+ 1: The guest kernel can eject this paravirtual event.
+
+
+Ejecting paravirtual event
+======================
+kvm_pv_eject_event()
+Arguments:
+ event: The event to be ejected.
+
+Return Value:
+ None
diff --git a/arch/ia64/include/asm/kvm_para.h b/arch/ia64/include/asm/kvm_para.h
index 2019cb9..b5ec658 100644
--- a/arch/ia64/include/asm/kvm_para.h
+++ b/arch/ia64/include/asm/kvm_para.h
@@ -31,6 +31,20 @@ static inline bool kvm_check_and_clear_guest_paused(void)
return false;
}
+static inline int kvm_arch_pv_event_init(void)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_arch_pv_features(void)
+{
+ return 0;
+}
+
+static inline void kvm_arch_pv_eject_event(unsigned int event)
+{
+}
+
#endif
#endif
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index c18916b..01b98c7 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -211,6 +211,20 @@ static inline bool kvm_check_and_clear_guest_paused(void)
return false;
}
+static inline int kvm_arch_pv_event_init(void)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_arch_pv_features(void)
+{
+ return 0;
+}
+
+static inline void kvm_arch_pv_eject_event(unsigned int event)
+{
+}
+
#endif /* __KERNEL__ */
#endif /* __POWERPC_KVM_PARA_H__ */
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index da44867..00ce058 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -154,6 +154,20 @@ static inline bool kvm_check_and_clear_guest_paused(void)
return false;
}
+static inline int kvm_arch_pv_event_init(void)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_arch_pv_features(void)
+{
+ return 0;
+}
+
+static inline void kvm_arch_pv_eject_event(unsigned int event)
+{
+}
+
#endif
#endif /* __S390_KVM_PARA_H */
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index eb3e9d8..4315af6 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -96,8 +96,11 @@ struct kvm_vcpu_pv_apf_data {
#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
#define KVM_PV_EOI_DISABLED 0x0
+#define KVM_PV_EVENT_PORT (0x505UL)
+
#ifdef __KERNEL__
#include <asm/processor.h>
+#include <linux/ioport.h>
extern void kvmclock_init(void);
extern int kvm_register_clock(char *txt);
@@ -228,6 +231,24 @@ static inline void kvm_disable_steal_time(void)
}
#endif
+static inline int kvm_arch_pv_event_init(void)
+{
+ if (!request_region(KVM_PV_EVENT_PORT, 4, "KVM_PV_EVENT"))
+ return -1;
+
+ return 0;
+}
+
+static inline unsigned int kvm_arch_pv_features(void)
+{
+ return inl(KVM_PV_EVENT_PORT);
+}
+
+static inline void kvm_arch_pv_eject_event(unsigned int event)
+{
+ outl(event, KVM_PV_EVENT_PORT);
+}
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4180a87..c44e46f 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -62,6 +62,15 @@ static int parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
+static int pv_event = 1;
+static int parse_no_pv_event(char *arg)
+{
+ pv_event = 0;
+ return 0;
+}
+
+early_param("no-pv-event", parse_no_pv_event);
+
static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
static int has_steal_clock = 0;
@@ -372,6 +381,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
.notifier_call = kvm_pv_reboot_notify,
};
+static int
+kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
+{
+ kvm_pv_eject_event(KVM_PV_EVENT_PANICKED);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block kvm_pv_panic_nb = {
+ .notifier_call = kvm_pv_panic_notify,
+};
+
static u64 kvm_steal_clock(int cpu)
{
u64 steal;
@@ -449,6 +469,34 @@ static void __init kvm_apf_trap_init(void)
set_intr_gate(14, &async_page_fault);
}
+static void __init kvm_pv_panicked_event_init(void)
+{
+ if (!kvm_para_available())
+ return;
+
+ if (kvm_pv_has_feature(KVM_PV_FEATURE_PANICKED))
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &kvm_pv_panic_nb);
+}
+
+static inline int kvm_pv_event_init(void)
+{
+ return kvm_arch_pv_event_init();
+}
+
+static int __init enable_pv_event(void)
+{
+ if (pv_event) {
+ if (kvm_pv_event_init())
+ return 0;
+
+ kvm_pv_panicked_event_init();
+ }
+
+ return 0;
+}
+arch_initcall(enable_pv_event);
+
void __init kvm_guest_init(void)
{
int i;
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index ff476dd..495e411 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -20,6 +20,12 @@
#define KVM_HC_FEATURES 3
#define KVM_HC_PPC_MAP_MAGIC_PAGE 4
+/* The bit of supported pv event */
+#define KVM_PV_FEATURE_PANICKED 0
+
+/* The pv event value */
+#define KVM_PV_EVENT_PANICKED 1
+
/*
* hypercalls use architecture specific
*/
@@ -33,5 +39,17 @@ static inline int kvm_para_has_feature(unsigned int feature)
return 1;
return 0;
}
+
+static inline int kvm_pv_has_feature(unsigned int feature)
+{
+ if (kvm_arch_pv_features() & (1UL << feature))
+ return 1;
+ return 0;
+}
+
+static inline void kvm_pv_eject_event(unsigned int event)
+{
+ kvm_arch_pv_eject_event(event);
+}
#endif /* __KERNEL__ */
#endif /* __LINUX_KVM_PARA_H */
--
1.7.10.2
update kernel headers to add pv event macros.
Signed-off-by: Wen Congyang <[email protected]>
Signed-off-by: Hu Tao <[email protected]>
---
linux-headers/asm-x86/kvm_para.h | 1 +
linux-headers/linux/kvm_para.h | 6 ++++++
2 files changed, 7 insertions(+)
diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h
index a1c3d72..781959a 100644
--- a/linux-headers/asm-x86/kvm_para.h
+++ b/linux-headers/asm-x86/kvm_para.h
@@ -96,5 +96,6 @@ struct kvm_vcpu_pv_apf_data {
#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
#define KVM_PV_EOI_DISABLED 0x0
+#define KVM_PV_EVENT_PORT (0x505UL)
#endif /* _ASM_X86_KVM_PARA_H */
diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h
index 7bdcf93..f6be0bb 100644
--- a/linux-headers/linux/kvm_para.h
+++ b/linux-headers/linux/kvm_para.h
@@ -20,6 +20,12 @@
#define KVM_HC_FEATURES 3
#define KVM_HC_PPC_MAP_MAGIC_PAGE 4
+/* The bit of supported pv event */
+#define KVM_PV_FEATURE_PANICKED 0
+
+/* The pv event value */
+#define KVM_PV_EVENT_PANICKED 1
+
/*
* hypercalls use architecture specific
*/
--
1.7.10.2
From: Wen Congyang <[email protected]>
The guest will be in this state when it is panicked.
Signed-off-by: Wen Congyang <[email protected]>
---
qapi-schema.json | 6 +++++-
qmp.c | 3 ++-
vl.c | 7 ++++++-
3 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/qapi-schema.json b/qapi-schema.json
index c615ee2..25a21eb 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -174,11 +174,15 @@
# @suspended: guest is suspended (ACPI S3)
#
# @watchdog: the watchdog action is configured to pause and has been triggered
+#
+# @guest-panicked: the panicked action is configured to pause and has been
+# triggered.
##
{ 'enum': 'RunState',
'data': [ 'debug', 'inmigrate', 'internal-error', 'io-error', 'paused',
'postmigrate', 'prelaunch', 'finish-migrate', 'restore-vm',
- 'running', 'save-vm', 'shutdown', 'suspended', 'watchdog' ] }
+ 'running', 'save-vm', 'shutdown', 'suspended', 'watchdog',
+ 'guest-panicked' ] }
##
# @SnapshotInfo
diff --git a/qmp.c b/qmp.c
index f4a757b..52cc623 100644
--- a/qmp.c
+++ b/qmp.c
@@ -148,7 +148,8 @@ void qmp_cont(Error **errp)
error_set(errp, QERR_MIGRATION_EXPECTED);
return;
} else if (runstate_check(RUN_STATE_INTERNAL_ERROR) ||
- runstate_check(RUN_STATE_SHUTDOWN)) {
+ runstate_check(RUN_STATE_SHUTDOWN) ||
+ runstate_check(RUN_STATE_GUEST_PANICKED)) {
error_set(errp, QERR_RESET_REQUIRED);
return;
} else if (runstate_check(RUN_STATE_SUSPENDED)) {
diff --git a/vl.c b/vl.c
index ca2e1e2..33b4578 100644
--- a/vl.c
+++ b/vl.c
@@ -373,6 +373,7 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_RUNNING, RUN_STATE_SAVE_VM },
{ RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN },
{ RUN_STATE_RUNNING, RUN_STATE_WATCHDOG },
+ { RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
@@ -387,6 +388,9 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
{ RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
+ { RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING },
+ { RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE },
+
{ RUN_STATE_MAX, RUN_STATE_MAX },
};
@@ -1617,7 +1621,8 @@ static bool main_loop_should_exit(void)
qemu_system_reset(VMRESET_REPORT);
resume_all_vcpus();
if (runstate_check(RUN_STATE_INTERNAL_ERROR) ||
- runstate_check(RUN_STATE_SHUTDOWN)) {
+ runstate_check(RUN_STATE_SHUTDOWN) ||
+ runstate_check(RUN_STATE_GUEST_PANICKED)) {
bdrv_iterate(iostatus_bdrv_it, NULL);
vm_start();
}
--
1.7.10.2
From: Wen Congyang <[email protected]>
Signed-off-by: Wen Congyang <[email protected]>
---
hw/pc_piix.c | 6 +++++-
qemu-config.c | 4 ++++
qemu-options.hx | 3 ++-
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index fb67dc1..864d356 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -149,6 +149,8 @@ static void pc_init1(MemoryRegion *system_memory,
MemoryRegion *pci_memory;
MemoryRegion *rom_memory;
void *fw_cfg = NULL;
+ QemuOptsList *list = qemu_find_opts("machine");
+ bool enable_pv_event;
pc_cpus_init(cpu_model);
@@ -287,7 +289,9 @@ static void pc_init1(MemoryRegion *system_memory,
pc_pci_device_init(pci_bus);
}
- if (kvm_enabled()) {
+ enable_pv_event = qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
+ "enable_pv_event", false);
+ if (kvm_enabled() && enable_pv_event) {
kvm_pv_event_init(isa_bus);
}
}
diff --git a/qemu-config.c b/qemu-config.c
index cd1ec21..db8f828 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -619,6 +619,10 @@ static QemuOptsList qemu_machine_opts = {
.name = "mem-merge",
.type = QEMU_OPT_BOOL,
.help = "enable/disable memory merge support",
+ }, {
+ .name = "enable_pv_event",
+ .type = QEMU_OPT_BOOL,
+ .help = "handle pv event"
},
{ /* End of list */ }
},
diff --git a/qemu-options.hx b/qemu-options.hx
index 46f0539..de667d6 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -35,7 +35,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
" kernel_irqchip=on|off controls accelerated irqchip support\n"
" kvm_shadow_mem=size of KVM shadow MMU\n"
" dump-guest-core=on|off include guest memory in a core dump (default=on)\n"
- " mem-merge=on|off controls memory merge support (default: on)\n",
+ " mem-merge=on|off controls memory merge support (default: on)\n"
+ " enable_pv_event=on|off controls pv event support\n",
QEMU_ARCH_ALL)
STEXI
@item -machine [type=]@var{name}[,prop=@var{value}[,...]]
--
1.7.10.2
If the target is x86/x86_64, the guest's kernel will write 0x01 to the
port KVM_PV_EVENT_PORT when it is panciked. This patch introduces a new
qom device kvm_pv_ioport to listen this I/O port, and deal with panicked
event according to panicked_action's value. The possible actions are:
1. emit QEVENT_GUEST_PANICKED only
2. emit QEVENT_GUEST_PANICKED and pause the guest
3. emit QEVENT_GUEST_PANICKED and poweroff the guest
4. emit QEVENT_GUEST_PANICKED and reset the guest
I/O ports does not work for some targets(for example: s390). And you
can implement another qom device, and include it's code into pv_event.c
for such target.
Note: if we emit QEVENT_GUEST_PANICKED only, and the management
application does not receive this event(the management may not
run when the event is emitted), the management won't know the
guest is panicked.
Signed-off-by: Wen Congyang <[email protected]>
Signed-off-by: Hu Tao <[email protected]>
---
hw/kvm/Makefile.objs | 2 +-
hw/kvm/pv_event.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++
hw/pc_piix.c | 5 ++
kvm-stub.c | 4 +
kvm.h | 2 +
5 files changed, 209 insertions(+), 1 deletion(-)
create mode 100644 hw/kvm/pv_event.c
diff --git a/hw/kvm/Makefile.objs b/hw/kvm/Makefile.objs
index f620d7f..cf93199 100644
--- a/hw/kvm/Makefile.objs
+++ b/hw/kvm/Makefile.objs
@@ -1 +1 @@
-obj-$(CONFIG_KVM) += clock.o apic.o i8259.o ioapic.o i8254.o pci-assign.o
+obj-$(CONFIG_KVM) += clock.o apic.o i8259.o ioapic.o i8254.o pci-assign.o pv_event.o
diff --git a/hw/kvm/pv_event.c b/hw/kvm/pv_event.c
new file mode 100644
index 0000000..112491e
--- /dev/null
+++ b/hw/kvm/pv_event.c
@@ -0,0 +1,197 @@
+/*
+ * QEMU KVM support, paravirtual event device
+ *
+ * Copyright Fujitsu, Corp. 2012
+ *
+ * Authors:
+ * Wen Congyang <[email protected]>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/kvm_para.h>
+#include <asm/kvm_para.h>
+#include <qobject.h>
+#include <qjson.h>
+#include <monitor.h>
+#include <sysemu.h>
+#include <kvm.h>
+
+/* Possible values for action parameter. */
+#define PANICKED_REPORT 1 /* emit QEVENT_GUEST_PANICKED only */
+#define PANICKED_PAUSE 2 /* emit QEVENT_GUEST_PANICKED and pause VM */
+#define PANICKED_POWEROFF 3 /* emit QEVENT_GUEST_PANICKED and quit VM */
+#define PANICKED_RESET 4 /* emit QEVENT_GUEST_PANICKED and reset VM */
+
+#define PV_EVENT_DRIVER "kvm_pv_event"
+
+struct PVEventAction {
+ char *panicked_action;
+ int panicked_action_value;
+};
+
+#define DEFINE_PV_EVENT_PROPERTIES(_state, _conf) \
+ DEFINE_PROP_STRING("panicked_action", _state, _conf.panicked_action)
+
+static void panicked_mon_event(const char *action)
+{
+ QObject *data;
+
+ data = qobject_from_jsonf("{ 'action': %s }", action);
+ monitor_protocol_event(QEVENT_GUEST_PANICKED, data);
+ qobject_decref(data);
+}
+
+static void panicked_perform_action(uint32_t panicked_action)
+{
+ switch (panicked_action) {
+ case PANICKED_REPORT:
+ panicked_mon_event("report");
+ break;
+
+ case PANICKED_PAUSE:
+ panicked_mon_event("pause");
+ vm_stop(RUN_STATE_GUEST_PANICKED);
+ break;
+
+ case PANICKED_POWEROFF:
+ panicked_mon_event("poweroff");
+ qemu_system_shutdown_request();
+ break;
+
+ case PANICKED_RESET:
+ panicked_mon_event("reset");
+ qemu_system_reset_request();
+ break;
+ }
+}
+
+static uint64_t supported_event(void)
+{
+ return 1 << KVM_PV_FEATURE_PANICKED;
+}
+
+static void handle_event(int event, struct PVEventAction *conf)
+{
+ if (event == KVM_PV_EVENT_PANICKED) {
+ panicked_perform_action(conf->panicked_action_value);
+ }
+}
+
+static int pv_event_init(struct PVEventAction *conf)
+{
+ if (!conf->panicked_action) {
+ conf->panicked_action_value = PANICKED_REPORT;
+ } else if (strcasecmp(conf->panicked_action, "none") == 0) {
+ conf->panicked_action_value = PANICKED_REPORT;
+ } else if (strcasecmp(conf->panicked_action, "pause") == 0) {
+ conf->panicked_action_value = PANICKED_PAUSE;
+ } else if (strcasecmp(conf->panicked_action, "poweroff") == 0) {
+ conf->panicked_action_value = PANICKED_POWEROFF;
+ } else if (strcasecmp(conf->panicked_action, "reset") == 0) {
+ conf->panicked_action_value = PANICKED_RESET;
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
+#if defined(KVM_PV_EVENT_PORT)
+
+#include "hw/isa.h"
+
+typedef struct {
+ ISADevice dev;
+ struct PVEventAction conf;
+ MemoryRegion ioport;
+} PVIOPortState;
+
+static uint64_t pv_io_read(void *opaque, hwaddr addr, unsigned size)
+{
+ return supported_event();
+}
+
+static void pv_io_write(void *opaque, hwaddr addr, uint64_t val,
+ unsigned size)
+{
+ PVIOPortState *s = opaque;
+
+ handle_event(val, &s->conf);
+}
+
+static const MemoryRegionOps pv_io_ops = {
+ .read = pv_io_read,
+ .write = pv_io_write,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
+static int pv_ioport_initfn(ISADevice *dev)
+{
+ PVIOPortState *s = DO_UPCAST(PVIOPortState, dev, dev);
+
+ if (pv_event_init(&s->conf) < 0) {
+ return -1;
+ }
+
+ memory_region_init_io(&s->ioport, &pv_io_ops, s, "pv_event", 1);
+ isa_register_ioport(dev, &s->ioport, KVM_PV_EVENT_PORT);
+
+ return 0;
+}
+
+static Property pv_ioport_properties[] = {
+ DEFINE_PV_EVENT_PROPERTIES(PVIOPortState, conf),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pv_ioport_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ ISADeviceClass *ic = ISA_DEVICE_CLASS(klass);
+
+ ic->init = pv_ioport_initfn;
+ dc->no_user = 1;
+ dc->props = pv_ioport_properties;
+}
+
+static TypeInfo pv_ioport_info = {
+ .name = PV_EVENT_DRIVER,
+ .parent = TYPE_ISA_DEVICE,
+ .instance_size = sizeof(PVIOPortState),
+ .class_init = pv_ioport_class_init,
+};
+
+static void pv_ioport_register_types(void)
+{
+ type_register_static(&pv_ioport_info);
+}
+
+type_init(pv_ioport_register_types)
+
+void kvm_pv_event_init(void *opaque)
+{
+ ISABus *bus = opaque;
+ ISADevice *dev;
+
+ dev = isa_create(bus, PV_EVENT_DRIVER);
+ qdev_init_nofail(&dev->qdev);
+}
+
+#else
+
+/*
+ * If you need a qom device to handle pv event, this device should
+ * be created and initialized in kvm_pv_event_init().
+ *
+ * The parameter opaque is the device's parent bus.
+ */
+void kvm_pv_event_init(void *opaque)
+{
+}
+#endif
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 47ebc1a..fb67dc1 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -46,6 +46,7 @@
#ifdef CONFIG_XEN
# include <xen/hvm/hvm_info_table.h>
#endif
+#include <linux/kvm_para.h>
#define MAX_IDE_BUS 2
@@ -285,6 +286,10 @@ static void pc_init1(MemoryRegion *system_memory,
if (pci_enabled) {
pc_pci_device_init(pci_bus);
}
+
+ if (kvm_enabled()) {
+ kvm_pv_event_init(isa_bus);
+ }
}
static void pc_init_pci(QEMUMachineInitArgs *args)
diff --git a/kvm-stub.c b/kvm-stub.c
index a3455e2..c9cfe4a 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -140,3 +140,7 @@ int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
{
return -ENOSYS;
}
+
+void kvm_pv_event_init(void *opaque)
+{
+}
diff --git a/kvm.h b/kvm.h
index 2b26dcb..3771b12 100644
--- a/kvm.h
+++ b/kvm.h
@@ -274,4 +274,6 @@ void kvm_irqchip_release_virq(KVMState *s, int virq);
int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
+
+void kvm_pv_event_init(void *opaque);
#endif
--
1.7.10.2
This event will be emited when the guest is panicked.
Signed-off-by: Wen Congyang <[email protected]>
---
monitor.c | 1 +
monitor.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/monitor.c b/monitor.c
index d17ae2d..d2e4bbf 100644
--- a/monitor.c
+++ b/monitor.c
@@ -457,6 +457,7 @@ static const char *monitor_event_names[] = {
[QEVENT_WAKEUP] = "WAKEUP",
[QEVENT_BALLOON_CHANGE] = "BALLOON_CHANGE",
[QEVENT_SPICE_MIGRATE_COMPLETED] = "SPICE_MIGRATE_COMPLETED",
+ [QEVENT_GUEST_PANICKED] = "GUEST_PANICKED",
};
QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX)
diff --git a/monitor.h b/monitor.h
index b6e7d95..3962340 100644
--- a/monitor.h
+++ b/monitor.h
@@ -45,6 +45,7 @@ typedef enum MonitorEvent {
QEVENT_WAKEUP,
QEVENT_BALLOON_CHANGE,
QEVENT_SPICE_MIGRATE_COMPLETED,
+ QEVENT_GUEST_PANICKED,
/* Add to 'monitor_event_names' array in monitor.c when
* defining new events here */
--
1.7.10.2
From: Wen Congyang <[email protected]>
The guest should run after reseting it, but it does not run if its
old state is RUN_STATE_INTERNAL_ERROR or RUN_STATE_PAUSED.
We don't set runstate to RUN_STATE_PAUSED when reseting the guest,
so the runstate will be changed from RUN_STATE_INTERNAL_ERROR or
RUN_STATE_PAUSED to RUN_STATE_RUNNING(not RUN_STATE_PAUSED).
Signed-off-by: Wen Congyang <[email protected]>
---
block.h | 2 ++
qmp.c | 2 +-
vl.c | 7 ++++---
3 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/block.h b/block.h
index e2d89d7..df10e30 100644
--- a/block.h
+++ b/block.h
@@ -360,6 +360,8 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs);
void bdrv_set_in_use(BlockDriverState *bs, int in_use);
int bdrv_in_use(BlockDriverState *bs);
+void iostatus_bdrv_it(void *opaque, BlockDriverState *bs);
+
enum BlockAcctType {
BDRV_ACCT_READ,
BDRV_ACCT_WRITE,
diff --git a/qmp.c b/qmp.c
index 36c54c5..f4a757b 100644
--- a/qmp.c
+++ b/qmp.c
@@ -125,7 +125,7 @@ SpiceInfo *qmp_query_spice(Error **errp)
};
#endif
-static void iostatus_bdrv_it(void *opaque, BlockDriverState *bs)
+void iostatus_bdrv_it(void *opaque, BlockDriverState *bs)
{
bdrv_iostatus_reset(bs);
}
diff --git a/vl.c b/vl.c
index ee3c43a..ca2e1e2 100644
--- a/vl.c
+++ b/vl.c
@@ -343,7 +343,7 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_INMIGRATE, RUN_STATE_PRELAUNCH },
- { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
+ { RUN_STATE_INTERNAL_ERROR, RUN_STATE_RUNNING },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_IO_ERROR, RUN_STATE_RUNNING },
@@ -376,7 +376,7 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
- { RUN_STATE_SHUTDOWN, RUN_STATE_PAUSED },
+ { RUN_STATE_SHUTDOWN, RUN_STATE_RUNNING },
{ RUN_STATE_SHUTDOWN, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_DEBUG, RUN_STATE_SUSPENDED },
@@ -1618,7 +1618,8 @@ static bool main_loop_should_exit(void)
resume_all_vcpus();
if (runstate_check(RUN_STATE_INTERNAL_ERROR) ||
runstate_check(RUN_STATE_SHUTDOWN)) {
- runstate_set(RUN_STATE_PAUSED);
+ bdrv_iterate(iostatus_bdrv_it, NULL);
+ vm_start();
}
}
if (qemu_wakeup_requested()) {
--
1.7.10.2
On Thu, Oct 25, 2012 at 11:42:32AM +0800, Hu Tao wrote:
> We can know the guest is panicked when the guest runs on xen.
> But we do not have such feature on kvm.
>
> Another purpose of this feature is: management app(for example:
> libvirt) can do auto dump when the guest is panicked. If management
> app does not do auto dump, the guest's user can do dump by hand if
> he sees the guest is panicked.
>
> We have three solutions to implement this feature:
> 1. use vmcall
> 2. use I/O port
> 3. use virtio-serial.
>
> We have decided to avoid touching hypervisor. The reason why I choose
> choose the I/O port is:
> 1. it is easier to implememt
> 2. it does not depend any virtual device
> 3. it can work when starting the kernel
It has been asked earlier why a simple virtio device is not usable
for this (with no response IIRC).
Also, there is no high level documentation: purpose of the interface,
how a management application should use it, etc.
At 10/31/2012 09:12 AM, Marcelo Tosatti Wrote:
> On Thu, Oct 25, 2012 at 11:42:32AM +0800, Hu Tao wrote:
>> We can know the guest is panicked when the guest runs on xen.
>> But we do not have such feature on kvm.
>>
>> Another purpose of this feature is: management app(for example:
>> libvirt) can do auto dump when the guest is panicked. If management
>> app does not do auto dump, the guest's user can do dump by hand if
>> he sees the guest is panicked.
>>
>> We have three solutions to implement this feature:
>> 1. use vmcall
>> 2. use I/O port
>> 3. use virtio-serial.
>>
>> We have decided to avoid touching hypervisor. The reason why I choose
>> choose the I/O port is:
>> 1. it is easier to implememt
>> 2. it does not depend any virtual device
>> 3. it can work when starting the kernel
>
> It has been asked earlier why a simple virtio device is not usable
> for this (with no response IIRC).
1. We can't use virtio device when the kernel is booting.
2. The virtio's driver can be built as a module, and if it is not loaded
and the kernel is panicked, there is no way to notify the host.
3. I/O port is more reliable than virtio device.
If virtio's driver has some bug, and it cause kernel panicked, we can't
use it. The I/O port is more reliable because it only depends on notifier
chain(If we use virtio device, it also depends on notifier chain).
Thanks
Wen Congyang
>
> Also, there is no high level documentation: purpose of the interface,
> how a management application should use it, etc.
>
>
On Tue, Oct 30, 2012 at 9:48 PM, Wen Congyang <[email protected]> wrote:
> At 10/31/2012 09:12 AM, Marcelo Tosatti Wrote:
>> It has been asked earlier why a simple virtio device is not usable
>> for this (with no response IIRC).
>
> 1. We can't use virtio device when the kernel is booting.
So the issue here is the small window between the point the guest
becomes "self aware" and to the point virtio drivers are loaded,
right?
I agree that if something happens during that interval, a
"virtio-notifier" driver won't catch that, but anything beyond that is
better done with a virtio driver, so how is the generic infrastructure
added in this patch useful to anything beyond detecting panics in that
initial interval?
> 2. The virtio's driver can be built as a module, and if it is not loaded
> and the kernel is panicked, there is no way to notify the host.
Even if the suggested virtio-notifier driver is built as a module, it
would get auto-loaded when the guest is booting, so I'm not sure about
this point?
> 3. I/O port is more reliable than virtio device.
> If virtio's driver has some bug, and it cause kernel panicked, we can't
> use it. The I/O port is more reliable because it only depends on notifier
> chain(If we use virtio device, it also depends on notifier chain).
This is like suggesting that we let KVM emulate virtio-blk on it's
own, parallel to the virtio implementation, so that even if there's a
problem with virtio-blk, KVM can emulate a virtio-blk on it's own.
Furthermore, why stop at virtio? What if the KVM code has a bug and it
doesn't pass IO properly? Or the x86 code? we still want panic
notifications if that happens...
Thanks,
Sasha
On Tue, Oct 30, 2012 at 10:30:02PM -0400, Sasha Levin wrote:
> On Tue, Oct 30, 2012 at 9:48 PM, Wen Congyang <[email protected]> wrote:
> > At 10/31/2012 09:12 AM, Marcelo Tosatti Wrote:
> >> It has been asked earlier why a simple virtio device is not usable
> >> for this (with no response IIRC).
> >
> > 1. We can't use virtio device when the kernel is booting.
>
> So the issue here is the small window between the point the guest
> becomes "self aware" and to the point virtio drivers are loaded,
> right?
>
> I agree that if something happens during that interval, a
> "virtio-notifier" driver won't catch that, but anything beyond that is
> better done with a virtio driver, so how is the generic infrastructure
> added in this patch useful to anything beyond detecting panics in that
> initial interval?
Asked earlier about quantification of panics in that window (i doubt
early panics are that significant for this usecase). netconsole has
the same issue:
"This module logs kernel printk messages over UDP allowing debugging of
problem where disk logging fails and serial consoles are impractical.
It can be used either built-in or as a module. As a built-in,
netconsole initializes immediately after NIC cards and will bring up
the specified interface as soon as possible. While this doesn't allow
capture of early kernel panics, it does capture most of the boot
process."
> > 2. The virtio's driver can be built as a module, and if it is not loaded
> > and the kernel is panicked, there is no way to notify the host.
>
> Even if the suggested virtio-notifier driver is built as a module, it
> would get auto-loaded when the guest is booting, so I'm not sure about
> this point?
> > 3. I/O port is more reliable than virtio device.
> > If virtio's driver has some bug, and it cause kernel panicked, we can't
> > use it. The I/O port is more reliable because it only depends on notifier
> > chain(If we use virtio device, it also depends on notifier chain).
>
> This is like suggesting that we let KVM emulate virtio-blk on it's
> own, parallel to the virtio implementation, so that even if there's a
> problem with virtio-blk, KVM can emulate a virtio-blk on it's own.
>
> Furthermore, why stop at virtio? What if the KVM code has a bug and it
> doesn't pass IO properly? Or the x86 code? we still want panic
> notifications if that happens...
On Tue, Oct 30, 2012 at 10:30:02PM -0400, Sasha Levin wrote:
> On Tue, Oct 30, 2012 at 9:48 PM, Wen Congyang <[email protected]> wrote:
> > At 10/31/2012 09:12 AM, Marcelo Tosatti Wrote:
> >> It has been asked earlier why a simple virtio device is not usable
> >> for this (with no response IIRC).
> >
> > 1. We can't use virtio device when the kernel is booting.
>
> So the issue here is the small window between the point the guest
> becomes "self aware" and to the point virtio drivers are loaded,
> right?
>
> I agree that if something happens during that interval, a
> "virtio-notifier" driver won't catch that, but anything beyond that is
> better done with a virtio driver, so how is the generic infrastructure
> added in this patch useful to anything beyond detecting panics in that
> initial interval?
Another point is dependency. To make panic notification more reliable,
we have to reduce its dependency on other parts of kernel as possible as
we can.
>
> > 2. The virtio's driver can be built as a module, and if it is not loaded
> > and the kernel is panicked, there is no way to notify the host.
>
> Even if the suggested virtio-notifier driver is built as a module, it
> would get auto-loaded when the guest is booting, so I'm not sure about
> this point?
>
> > 3. I/O port is more reliable than virtio device.
> > If virtio's driver has some bug, and it cause kernel panicked, we can't
> > use it. The I/O port is more reliable because it only depends on notifier
> > chain(If we use virtio device, it also depends on notifier chain).
>
> This is like suggesting that we let KVM emulate virtio-blk on it's
> own, parallel to the virtio implementation, so that even if there's a
> problem with virtio-blk, KVM can emulate a virtio-blk on it's own.
Not the same. On virtio-blk, if we can make use of virtio, why not? If
there is a problem of virtio-blk but caused by virtio itself, just fix
it in virtio.
But in the case of panic notification, more dependency means more
chances of failure of panic notification. Say, if we use a virtio device
to do panic notification, then we will fail if: virtio itself has
problems, virtio for some reason can't be deployed(neither built-in or
as a module), or guest doesn't support virtio, etc.
We choose IO because compared to virtio device, it is not that heavy and
less problematic.
>
> Furthermore, why stop at virtio? What if the KVM code has a bug and it
> doesn't pass IO properly? Or the x86 code? we still want panic
> notifications if that happens...
Better ideas are welcome.
--
Thanks,
Hu Tao
On Mon, Nov 5, 2012 at 8:58 PM, Hu Tao <[email protected]> wrote:
> But in the case of panic notification, more dependency means more
> chances of failure of panic notification. Say, if we use a virtio device
> to do panic notification, then we will fail if: virtio itself has
> problems, virtio for some reason can't be deployed(neither built-in or
> as a module), or guest doesn't support virtio, etc.
Add polling to your virtio device. If it didn't notify of a panic but
taking more than 20 sec to answer your poll request you can assume
it's dead.
Actually, just use virtio-serial and something in userspace on the guest.
> We choose IO because compared to virtio device, it is not that heavy and
> less problematic.
Less problematic? Heavy? Are there any known issues with virtio that
should be fixed? You make virtio sound like an old IDE drive or
something.
Thanks,
Sasha
On Fri, Nov 09, 2012 at 03:17:39PM -0500, Sasha Levin wrote:
> On Mon, Nov 5, 2012 at 8:58 PM, Hu Tao <[email protected]> wrote:
> > But in the case of panic notification, more dependency means more
> > chances of failure of panic notification. Say, if we use a virtio device
> > to do panic notification, then we will fail if: virtio itself has
> > problems, virtio for some reason can't be deployed(neither built-in or
> > as a module), or guest doesn't support virtio, etc.
>
> Add polling to your virtio device. If it didn't notify of a panic but
> taking more than 20 sec to answer your poll request you can assume
> it's dead.
>
> Actually, just use virtio-serial and something in userspace on the guest.
They want the guest to stop, so a memory dump can be taken by management
interface.
Hu Tao, lets assume port I/O is the preferred method for communication.
Now, the following comments have still not been addressed:
1) Lifecycle of the stopped guest and interaction with other stopped
states in QEMU.
2) Format of the interface for other architectures (you can choose
a different KVM supported architecture and write an example).
3) Clear/documented management interface for the feature.
Hi Marcelo,
On Tue, Nov 13, 2012 at 12:19:08AM -0200, Marcelo Tosatti wrote:
> On Fri, Nov 09, 2012 at 03:17:39PM -0500, Sasha Levin wrote:
> > On Mon, Nov 5, 2012 at 8:58 PM, Hu Tao <[email protected]> wrote:
> > > But in the case of panic notification, more dependency means more
> > > chances of failure of panic notification. Say, if we use a virtio device
> > > to do panic notification, then we will fail if: virtio itself has
> > > problems, virtio for some reason can't be deployed(neither built-in or
> > > as a module), or guest doesn't support virtio, etc.
> >
> > Add polling to your virtio device. If it didn't notify of a panic but
> > taking more than 20 sec to answer your poll request you can assume
> > it's dead.
> >
> > Actually, just use virtio-serial and something in userspace on the guest.
>
> They want the guest to stop, so a memory dump can be taken by management
> interface.
>
> Hu Tao, lets assume port I/O is the preferred method for communication.
Okey.
> Now, the following comments have still not been addressed:
>
> 1) Lifecycle of the stopped guest and interaction with other stopped
> states in QEMU.
Patch 3 already deals with run state transitions. But in case I'm
missing something, could you be more specific?
>
> 2) Format of the interface for other architectures (you can choose
> a different KVM supported architecture and write an example).
>
> 3) Clear/documented management interface for the feature.
It is documented in patch 0: Documentation/virtual/kvm/pv_event.txt.
Does it need to be improved?
On Tue, Nov 20, 2012 at 06:09:48PM +0800, Hu Tao wrote:
> Hi Marcelo,
>
> On Tue, Nov 13, 2012 at 12:19:08AM -0200, Marcelo Tosatti wrote:
> > On Fri, Nov 09, 2012 at 03:17:39PM -0500, Sasha Levin wrote:
> > > On Mon, Nov 5, 2012 at 8:58 PM, Hu Tao <[email protected]> wrote:
> > > > But in the case of panic notification, more dependency means more
> > > > chances of failure of panic notification. Say, if we use a virtio device
> > > > to do panic notification, then we will fail if: virtio itself has
> > > > problems, virtio for some reason can't be deployed(neither built-in or
> > > > as a module), or guest doesn't support virtio, etc.
> > >
> > > Add polling to your virtio device. If it didn't notify of a panic but
> > > taking more than 20 sec to answer your poll request you can assume
> > > it's dead.
> > >
> > > Actually, just use virtio-serial and something in userspace on the guest.
> >
> > They want the guest to stop, so a memory dump can be taken by management
> > interface.
> >
> > Hu Tao, lets assume port I/O is the preferred method for communication.
>
> Okey.
>
> > Now, the following comments have still not been addressed:
> >
> > 1) Lifecycle of the stopped guest and interaction with other stopped
> > states in QEMU.
>
> Patch 3 already deals with run state transitions. But in case I'm
> missing something, could you be more specific?
- What are the possibilities during migration? Say:
- migration starts.
- guest panics.
- migration starts vm on other side?
- Guest stopped due to EIO.
- guest vcpuN panics, VMEXIT but still outside QEMU.
- QEMU EIO error, stop vm.
- guest vcpuN completes, processes IO exit.
- system_reset due to panic.
- Add all possibilities that should be verified (that is, interaction
of this feature with other stopped states in QEMU).
---
- What happens if the guest has reboot-on-panic configured? Does it take
precedence over hypervisor notification?
Out of curiosity, does kexec support memory dumping?
> > 2) Format of the interface for other architectures (you can choose
> > a different KVM supported architecture and write an example).
> >
> > 3) Clear/documented management interface for the feature.
>
> It is documented in patch 0: Documentation/virtual/kvm/pv_event.txt.
> Does it need to be improved?
This is documentation for the host<->guest interface. There is no
documentation on the interface for management.
On Tue, Nov 20, 2012 at 07:33:49PM -0200, Marcelo Tosatti wrote:
> On Tue, Nov 20, 2012 at 06:09:48PM +0800, Hu Tao wrote:
> > Hi Marcelo,
> >
> > On Tue, Nov 13, 2012 at 12:19:08AM -0200, Marcelo Tosatti wrote:
> > > On Fri, Nov 09, 2012 at 03:17:39PM -0500, Sasha Levin wrote:
> > > > On Mon, Nov 5, 2012 at 8:58 PM, Hu Tao <[email protected]> wrote:
> > > > > But in the case of panic notification, more dependency means more
> > > > > chances of failure of panic notification. Say, if we use a virtio device
> > > > > to do panic notification, then we will fail if: virtio itself has
> > > > > problems, virtio for some reason can't be deployed(neither built-in or
> > > > > as a module), or guest doesn't support virtio, etc.
> > > >
> > > > Add polling to your virtio device. If it didn't notify of a panic but
> > > > taking more than 20 sec to answer your poll request you can assume
> > > > it's dead.
> > > >
> > > > Actually, just use virtio-serial and something in userspace on the guest.
> > >
> > > They want the guest to stop, so a memory dump can be taken by management
> > > interface.
> > >
> > > Hu Tao, lets assume port I/O is the preferred method for communication.
> >
> > Okey.
> >
> > > Now, the following comments have still not been addressed:
> > >
> > > 1) Lifecycle of the stopped guest and interaction with other stopped
> > > states in QEMU.
> >
> > Patch 3 already deals with run state transitions. But in case I'm
> > missing something, could you be more specific?
>
> - What are the possibilities during migration? Say:
> - migration starts.
> - guest panics.
> - migration starts vm on other side?
> - Guest stopped due to EIO.
> - guest vcpuN panics, VMEXIT but still outside QEMU.
> - QEMU EIO error, stop vm.
> - guest vcpuN completes, processes IO exit.
> - system_reset due to panic.
> - Add all possibilities that should be verified (that is, interaction
> of this feature with other stopped states in QEMU).
>
BTW I do remember getting asserts while using breakpoints via gdbstub
and stop/cont from the monitor.
> ---
>
> - What happens if the guest has reboot-on-panic configured? Does it take
> precedence over hypervisor notification?
>
>
>
> Out of curiosity, does kexec support memory dumping?
>
> > > 2) Format of the interface for other architectures (you can choose
> > > a different KVM supported architecture and write an example).
> > >
> > > 3) Clear/documented management interface for the feature.
> >
> > It is documented in patch 0: Documentation/virtual/kvm/pv_event.txt.
> > Does it need to be improved?
>
> This is documentation for the host<->guest interface. There is no
> documentation on the interface for management.
--
Gleb.