We can know the guest is panicked when the guest runs on xen.
But we do not have such feature on kvm.
Another purpose of this feature is: management app(for example:
libvirt) can do auto dump when the guest is panicked. If management
app does not do auto dump, the guest's user can do dump by hand if
he sees the guest is panicked.
We have three solutions to implement this feature:
1. use vmcall
2. use I/O port
3. use virtio-serial.
We have decided to avoid touching hypervisor. The reason why I choose
choose the I/O port is:
1. it is easier to implememt
2. it does not depend any virtual device
3. it can work when starting the kernel
Signed-off-by: Wen Congyang <[email protected]>
---
Documentation/virtual/kvm/pv_event.txt | 32 ++++++++++++++++++++++++++++++++
arch/ia64/include/asm/kvm_para.h | 14 ++++++++++++++
arch/powerpc/include/asm/kvm_para.h | 14 ++++++++++++++
arch/s390/include/asm/kvm_para.h | 14 ++++++++++++++
arch/x86/include/asm/kvm_para.h | 27 +++++++++++++++++++++++++++
arch/x86/kernel/kvm.c | 25 +++++++++++++++++++++++++
include/linux/kvm_para.h | 23 +++++++++++++++++++++++
7 files changed, 149 insertions(+), 0 deletions(-)
create mode 100644 Documentation/virtual/kvm/pv_event.txt
diff --git a/Documentation/virtual/kvm/pv_event.txt b/Documentation/virtual/kvm/pv_event.txt
new file mode 100644
index 0000000..bb04de0
--- /dev/null
+++ b/Documentation/virtual/kvm/pv_event.txt
@@ -0,0 +1,32 @@
+The KVM paravirtual event interface
+=================================
+
+Initializing the paravirtual event interface
+======================
+kvm_pv_event_init()
+Argiments:
+ None
+
+Return Value:
+ 0: The guest kernel can use paravirtual event interface.
+ 1: The guest kernel can't use paravirtual event interface.
+
+Querying whether the event can be ejected
+======================
+kvm_pv_has_feature()
+Arguments:
+ feature: The bit value of this paravirtual event to query
+
+Return Value:
+ 0 : The guest kernel can't eject this paravirtual event.
+ -1: The guest kernel can eject this paravirtual event.
+
+
+Ejecting paravirtual event
+======================
+kvm_pv_eject_event()
+Arguments:
+ event: The event to be ejected.
+
+Return Value:
+ None
diff --git a/arch/ia64/include/asm/kvm_para.h b/arch/ia64/include/asm/kvm_para.h
index 2019cb9..b5ec658 100644
--- a/arch/ia64/include/asm/kvm_para.h
+++ b/arch/ia64/include/asm/kvm_para.h
@@ -31,6 +31,20 @@ static inline bool kvm_check_and_clear_guest_paused(void)
return false;
}
+static inline int kvm_arch_pv_event_init(void)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_arch_pv_features(void)
+{
+ return 0;
+}
+
+static inline void kvm_arch_pv_eject_event(unsigned int event)
+{
+}
+
#endif
#endif
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index c18916b..01b98c7 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -211,6 +211,20 @@ static inline bool kvm_check_and_clear_guest_paused(void)
return false;
}
+static inline int kvm_arch_pv_event_init(void)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_arch_pv_features(void)
+{
+ return 0;
+}
+
+static inline void kvm_arch_pv_eject_event(unsigned int event)
+{
+}
+
#endif /* __KERNEL__ */
#endif /* __POWERPC_KVM_PARA_H__ */
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index da44867..00ce058 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -154,6 +154,20 @@ static inline bool kvm_check_and_clear_guest_paused(void)
return false;
}
+static inline int kvm_arch_pv_event_init(void)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_arch_pv_features(void)
+{
+ return 0;
+}
+
+static inline void kvm_arch_pv_eject_event(unsigned int event)
+{
+}
+
#endif
#endif /* __S390_KVM_PARA_H */
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 2f7712e..7d297f0 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -96,8 +96,11 @@ struct kvm_vcpu_pv_apf_data {
#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
#define KVM_PV_EOI_DISABLED 0x0
+#define KVM_PV_EVENT_PORT (0x505UL)
+
#ifdef __KERNEL__
#include <asm/processor.h>
+#include <linux/ioport.h>
extern void kvmclock_init(void);
extern int kvm_register_clock(char *txt);
@@ -228,6 +231,30 @@ static inline void kvm_disable_steal_time(void)
}
#endif
+static inline int kvm_arch_pv_event_init(void)
+{
+ if (!request_region(KVM_PV_EVENT_PORT, 1, "KVM_PV_EVENT"))
+ return -1;
+
+ return 0;
+}
+
+static inline unsigned int kvm_arch_pv_features(void)
+{
+ unsigned int features = inl(KVM_PV_EVENT_PORT);
+
+ /* Reading from an invalid I/O port will return -1 */
+ if (features == ~0)
+ features = 0;
+
+ return features;
+}
+
+static inline void kvm_arch_pv_eject_event(unsigned int event)
+{
+ outl(event, KVM_PV_EVENT_PORT);
+}
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index c1d61ee..6129459 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -368,6 +368,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
.notifier_call = kvm_pv_reboot_notify,
};
+static int
+kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
+{
+ kvm_pv_eject_event(KVM_PV_EVENT_PANICKED);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block kvm_pv_panic_nb = {
+ .notifier_call = kvm_pv_panic_notify,
+};
+
static u64 kvm_steal_clock(int cpu)
{
u64 steal;
@@ -447,6 +458,20 @@ static void __init kvm_apf_trap_init(void)
set_intr_gate(14, &async_page_fault);
}
+static void __init kvm_pv_panicked_event_init(void)
+{
+ if (!kvm_para_available())
+ return;
+
+ if (kvm_pv_event_init())
+ return;
+
+ if (kvm_pv_has_feature(KVM_PV_FEATURE_PANICKED))
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &kvm_pv_panic_nb);
+}
+arch_initcall(kvm_pv_panicked_event_init);
+
void __init kvm_guest_init(void)
{
int i;
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index ff476dd..8e0fb81 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -20,6 +20,12 @@
#define KVM_HC_FEATURES 3
#define KVM_HC_PPC_MAP_MAGIC_PAGE 4
+/* The bit of supported pv event */
+#define KVM_PV_FEATURE_PANICKED 0
+
+/* The pv event value */
+#define KVM_PV_EVENT_PANICKED 1
+
/*
* hypercalls use architecture specific
*/
@@ -33,5 +39,22 @@ static inline int kvm_para_has_feature(unsigned int feature)
return 1;
return 0;
}
+
+static inline int kvm_pv_event_init(void)
+{
+ return kvm_arch_pv_event_init();
+}
+
+static inline int kvm_pv_has_feature(unsigned int feature)
+{
+ if (kvm_arch_pv_features() & (1UL << feature))
+ return 1;
+ return 0;
+}
+
+static inline void kvm_pv_eject_event(unsigned int event)
+{
+ kvm_arch_pv_eject_event(event);
+}
#endif /* __KERNEL__ */
#endif /* __LINUX_KVM_PARA_H */
--
1.7.1
The guest should run after reseting it, but it does not run if its
old state is RUN_STATE_INTERNAL_ERROR or RUN_STATE_PAUSED.
We don't set runstate to RUN_STATE_PAUSED when reseting the guest,
so the runstate will be changed from RUN_STATE_INTERNAL_ERROR or
RUN_STATE_PAUSED to RUN_STATE_RUNNING(not RUN_STATE_PAUSED).
Signed-off-by: Wen Congyang <[email protected]>
---
block.h | 2 ++
qmp.c | 2 +-
vl.c | 7 ++++---
3 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/block.h b/block.h
index 2e2be11..c3265c2 100644
--- a/block.h
+++ b/block.h
@@ -339,6 +339,8 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs);
void bdrv_set_in_use(BlockDriverState *bs, int in_use);
int bdrv_in_use(BlockDriverState *bs);
+void iostatus_bdrv_it(void *opaque, BlockDriverState *bs);
+
enum BlockAcctType {
BDRV_ACCT_READ,
BDRV_ACCT_WRITE,
diff --git a/qmp.c b/qmp.c
index 8463922..c5a20f1 100644
--- a/qmp.c
+++ b/qmp.c
@@ -125,7 +125,7 @@ SpiceInfo *qmp_query_spice(Error **errp)
};
#endif
-static void iostatus_bdrv_it(void *opaque, BlockDriverState *bs)
+void iostatus_bdrv_it(void *opaque, BlockDriverState *bs)
{
bdrv_iostatus_reset(bs);
}
diff --git a/vl.c b/vl.c
index 7c577fa..316a977 100644
--- a/vl.c
+++ b/vl.c
@@ -343,7 +343,7 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_INMIGRATE, RUN_STATE_PRELAUNCH },
- { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
+ { RUN_STATE_INTERNAL_ERROR, RUN_STATE_RUNNING },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_IO_ERROR, RUN_STATE_RUNNING },
@@ -376,7 +376,7 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
- { RUN_STATE_SHUTDOWN, RUN_STATE_PAUSED },
+ { RUN_STATE_SHUTDOWN, RUN_STATE_RUNNING },
{ RUN_STATE_SHUTDOWN, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_DEBUG, RUN_STATE_SUSPENDED },
@@ -1608,7 +1608,8 @@ static bool main_loop_should_exit(void)
resume_all_vcpus();
if (runstate_check(RUN_STATE_INTERNAL_ERROR) ||
runstate_check(RUN_STATE_SHUTDOWN)) {
- runstate_set(RUN_STATE_PAUSED);
+ bdrv_iterate(iostatus_bdrv_it, NULL);
+ vm_start();
}
}
if (qemu_wakeup_requested()) {
--
1.7.1
Corresponding kvm.git hash: 1d92128f with my patch for kvm
---
linux-headers/asm-s390/kvm.h | 2 +-
linux-headers/asm-s390/kvm_para.h | 2 +-
linux-headers/asm-x86/kvm.h | 1 +
linux-headers/asm-x86/kvm_para.h | 9 +++++++++
linux-headers/linux/kvm.h | 3 +++
linux-headers/linux/kvm_para.h | 6 ++++++
6 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h
index bdcbe0f..d25da59 100644
--- a/linux-headers/asm-s390/kvm.h
+++ b/linux-headers/asm-s390/kvm.h
@@ -1,7 +1,7 @@
#ifndef __LINUX_KVM_S390_H
#define __LINUX_KVM_S390_H
/*
- * asm-s390/kvm.h - KVM s390 specific structures and definitions
+ * KVM s390 specific structures and definitions
*
* Copyright IBM Corp. 2008
*
diff --git a/linux-headers/asm-s390/kvm_para.h b/linux-headers/asm-s390/kvm_para.h
index 8e2dd67..870051f 100644
--- a/linux-headers/asm-s390/kvm_para.h
+++ b/linux-headers/asm-s390/kvm_para.h
@@ -1,5 +1,5 @@
/*
- * asm-s390/kvm_para.h - definition for paravirtual devices on s390
+ * definition for paravirtual devices on s390
*
* Copyright IBM Corp. 2008
*
diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index e7d1c19..246617e 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -12,6 +12,7 @@
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_PIT
#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_DEVICE_ASSIGNMENT
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h
index f2ac46a..53aca59 100644
--- a/linux-headers/asm-x86/kvm_para.h
+++ b/linux-headers/asm-x86/kvm_para.h
@@ -22,6 +22,7 @@
#define KVM_FEATURE_CLOCKSOURCE2 3
#define KVM_FEATURE_ASYNC_PF 4
#define KVM_FEATURE_STEAL_TIME 5
+#define KVM_FEATURE_PV_EOI 6
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
@@ -37,6 +38,7 @@
#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
+#define MSR_KVM_PV_EOI_EN 0x4b564d04
struct kvm_steal_time {
__u64 steal;
@@ -89,5 +91,12 @@ struct kvm_vcpu_pv_apf_data {
__u32 enabled;
};
+#define KVM_PV_EOI_BIT 0
+#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
+#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
+#define KVM_PV_EOI_DISABLED 0x0
+
+#define KVM_PV_EVENT_PORT (0x505UL)
+
#endif /* _ASM_X86_KVM_PARA_H */
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 5a9d4e3..4b9e575 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -617,6 +617,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_SIGNAL_MSI 77
#define KVM_CAP_PPC_GET_SMMU_INFO 78
#define KVM_CAP_S390_COW 79
+#define KVM_CAP_PPC_ALLOC_HTAB 80
#ifdef KVM_CAP_IRQ_ROUTING
@@ -828,6 +829,8 @@ struct kvm_s390_ucas_mapping {
#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi)
/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info)
+/* Available with KVM_CAP_PPC_ALLOC_HTAB */
+#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32)
/*
* ioctls for vcpu fds
diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h
index 7bdcf93..f6be0bb 100644
--- a/linux-headers/linux/kvm_para.h
+++ b/linux-headers/linux/kvm_para.h
@@ -20,6 +20,12 @@
#define KVM_HC_FEATURES 3
#define KVM_HC_PPC_MAP_MAGIC_PAGE 4
+/* The bit of supported pv event */
+#define KVM_PV_FEATURE_PANICKED 0
+
+/* The pv event value */
+#define KVM_PV_EVENT_PANICKED 1
+
/*
* hypercalls use architecture specific
*/
--
1.7.1
The guest will be in this state when it is panicked.
Signed-off-by: Wen Congyang <[email protected]>
---
qapi-schema.json | 6 +++++-
qmp.c | 3 ++-
vl.c | 7 ++++++-
3 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/qapi-schema.json b/qapi-schema.json
index bd8ad74..edb090a 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -149,11 +149,15 @@
# @suspended: guest is suspended (ACPI S3)
#
# @watchdog: the watchdog action is configured to pause and has been triggered
+#
+# @guest-panicked: the panicked action is configured to pause and has been
+# triggered.
##
{ 'enum': 'RunState',
'data': [ 'debug', 'inmigrate', 'internal-error', 'io-error', 'paused',
'postmigrate', 'prelaunch', 'finish-migrate', 'restore-vm',
- 'running', 'save-vm', 'shutdown', 'suspended', 'watchdog' ] }
+ 'running', 'save-vm', 'shutdown', 'suspended', 'watchdog',
+ 'guest-panicked' ] }
##
# @StatusInfo:
diff --git a/qmp.c b/qmp.c
index c5a20f1..f863f56 100644
--- a/qmp.c
+++ b/qmp.c
@@ -148,7 +148,8 @@ void qmp_cont(Error **errp)
error_set(errp, QERR_MIGRATION_EXPECTED);
return;
} else if (runstate_check(RUN_STATE_INTERNAL_ERROR) ||
- runstate_check(RUN_STATE_SHUTDOWN)) {
+ runstate_check(RUN_STATE_SHUTDOWN) ||
+ runstate_check(RUN_STATE_GUEST_PANICKED)) {
error_set(errp, QERR_RESET_REQUIRED);
return;
} else if (runstate_check(RUN_STATE_SUSPENDED)) {
diff --git a/vl.c b/vl.c
index 316a977..947b23a 100644
--- a/vl.c
+++ b/vl.c
@@ -373,6 +373,7 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_RUNNING, RUN_STATE_SAVE_VM },
{ RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN },
{ RUN_STATE_RUNNING, RUN_STATE_WATCHDOG },
+ { RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
@@ -387,6 +388,9 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
{ RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
+ { RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING },
+ { RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE },
+
{ RUN_STATE_MAX, RUN_STATE_MAX },
};
@@ -1607,7 +1611,8 @@ static bool main_loop_should_exit(void)
qemu_system_reset(VMRESET_REPORT);
resume_all_vcpus();
if (runstate_check(RUN_STATE_INTERNAL_ERROR) ||
- runstate_check(RUN_STATE_SHUTDOWN)) {
+ runstate_check(RUN_STATE_SHUTDOWN) ||
+ runstate_check(RUN_STATE_GUEST_PANICKED)) {
bdrv_iterate(iostatus_bdrv_it, NULL);
vm_start();
}
--
1.7.1
This event will be emited when the guest is panicked.
Signed-off-by: Wen Congyang <[email protected]>
---
monitor.c | 1 +
monitor.h | 1 +
2 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/monitor.c b/monitor.c
index b17b1bb..f74dd2d 100644
--- a/monitor.c
+++ b/monitor.c
@@ -455,6 +455,7 @@ static const char *monitor_event_names[] = {
[QEVENT_SUSPEND_DISK] = "SUSPEND_DISK",
[QEVENT_WAKEUP] = "WAKEUP",
[QEVENT_BALLOON_CHANGE] = "BALLOON_CHANGE",
+ [QEVENT_GUEST_PANICKED] = "GUEST_PANICKED",
};
QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX)
diff --git a/monitor.h b/monitor.h
index 47d556b..f48a502 100644
--- a/monitor.h
+++ b/monitor.h
@@ -43,6 +43,7 @@ typedef enum MonitorEvent {
QEVENT_SUSPEND_DISK,
QEVENT_WAKEUP,
QEVENT_BALLOON_CHANGE,
+ QEVENT_GUEST_PANICKED,
/* Add to 'monitor_event_names' array in monitor.c when
* defining new events here */
--
1.7.1
If the target is x86/x86_64, the guest's kernel will write 0x01 to the
port KVM_PV_EVENT_PORT when it is panciked. This patch introduces a new
qom device kvm_pv_ioport to listen this I/O port, and deal with panicked
event according to panicked_action's value. The possible actions are:
1. emit QEVENT_GUEST_PANICKED only
2. emit QEVENT_GUEST_PANICKED and pause the guest
3. emit QEVENT_GUEST_PANICKED and poweroff the guest
4. emit QEVENT_GUEST_PANICKED and reset the guest
I/O ports does not work for some targets(for example: s390). And you
can implement another qom device, and include it's code into pv_event.c
for such target.
Note: if we emit QEVENT_GUEST_PANICKED only, and the management
application does not receive this event(the management may not
run when the event is emitted), the management won't know the
guest is panicked.
Signed-off-by: Wen Congyang <[email protected]>
---
hw/kvm/Makefile.objs | 2 +-
hw/kvm/pv_event.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++
hw/pc_piix.c | 5 ++
kvm-stub.c | 4 +
kvm.h | 2 +
5 files changed, 209 insertions(+), 1 deletions(-)
create mode 100644 hw/kvm/pv_event.c
diff --git a/hw/kvm/Makefile.objs b/hw/kvm/Makefile.objs
index 226497a..23e3b30 100644
--- a/hw/kvm/Makefile.objs
+++ b/hw/kvm/Makefile.objs
@@ -1 +1 @@
-obj-$(CONFIG_KVM) += clock.o apic.o i8259.o ioapic.o i8254.o
+obj-$(CONFIG_KVM) += clock.o apic.o i8259.o ioapic.o i8254.o pv_event.o
diff --git a/hw/kvm/pv_event.c b/hw/kvm/pv_event.c
new file mode 100644
index 0000000..e14b030
--- /dev/null
+++ b/hw/kvm/pv_event.c
@@ -0,0 +1,197 @@
+/*
+ * QEMU KVM support, paravirtual event device
+ *
+ * Copyright Fujitsu, Corp. 2012
+ *
+ * Authors:
+ * Wen Congyang <[email protected]>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/kvm_para.h>
+#include <asm/kvm_para.h>
+#include <qobject.h>
+#include <qjson.h>
+#include <monitor.h>
+#include <sysemu.h>
+#include <kvm.h>
+
+/* Possible values for action parameter. */
+#define PANICKED_REPORT 1 /* emit QEVENT_GUEST_PANICKED only */
+#define PANICKED_PAUSE 2 /* emit QEVENT_GUEST_PANICKED and pause VM */
+#define PANICKED_POWEROFF 3 /* emit QEVENT_GUEST_PANICKED and quit VM */
+#define PANICKED_RESET 4 /* emit QEVENT_GUEST_PANICKED and reset VM */
+
+#define PV_EVENT_DRIVER "kvm_pv_event"
+
+struct PVEventAction {
+ char *panicked_action;
+ int panicked_action_value;
+};
+
+#define DEFINE_PV_EVENT_PROPERTIES(_state, _conf) \
+ DEFINE_PROP_STRING("panicked_action", _state, _conf.panicked_action)
+
+static void panicked_mon_event(const char *action)
+{
+ QObject *data;
+
+ data = qobject_from_jsonf("{ 'action': %s }", action);
+ monitor_protocol_event(QEVENT_GUEST_PANICKED, data);
+ qobject_decref(data);
+}
+
+static void panicked_perform_action(uint32_t panicked_action)
+{
+ switch (panicked_action) {
+ case PANICKED_REPORT:
+ panicked_mon_event("report");
+ break;
+
+ case PANICKED_PAUSE:
+ panicked_mon_event("pause");
+ vm_stop(RUN_STATE_GUEST_PANICKED);
+ break;
+
+ case PANICKED_POWEROFF:
+ panicked_mon_event("poweroff");
+ qemu_system_shutdown_request();
+ break;
+
+ case PANICKED_RESET:
+ panicked_mon_event("reset");
+ qemu_system_reset_request();
+ break;
+ }
+}
+
+static uint64_t supported_event(void)
+{
+ return 1 << KVM_PV_FEATURE_PANICKED;
+}
+
+static void handle_event(int event, struct PVEventAction *conf)
+{
+ if (event == KVM_PV_EVENT_PANICKED) {
+ panicked_perform_action(conf->panicked_action_value);
+ }
+}
+
+static int pv_event_init(struct PVEventAction *conf)
+{
+ if (!conf->panicked_action) {
+ conf->panicked_action_value = PANICKED_REPORT;
+ } else if (strcasecmp(conf->panicked_action, "none") == 0) {
+ conf->panicked_action_value = PANICKED_REPORT;
+ } else if (strcasecmp(conf->panicked_action, "pause") == 0) {
+ conf->panicked_action_value = PANICKED_PAUSE;
+ } else if (strcasecmp(conf->panicked_action, "poweroff") == 0) {
+ conf->panicked_action_value = PANICKED_POWEROFF;
+ } else if (strcasecmp(conf->panicked_action, "reset") == 0) {
+ conf->panicked_action_value = PANICKED_RESET;
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
+#if defined(KVM_PV_EVENT_PORT)
+
+#include "hw/isa.h"
+
+typedef struct {
+ ISADevice dev;
+ struct PVEventAction conf;
+ MemoryRegion ioport;
+} PVIOPortState;
+
+static uint64_t pv_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
+{
+ return supported_event();
+}
+
+static void pv_io_write(void *opaque, target_phys_addr_t addr, uint64_t val,
+ unsigned size)
+{
+ PVIOPortState *s = opaque;
+
+ handle_event(val, &s->conf);
+}
+
+static const MemoryRegionOps pv_io_ops = {
+ .read = pv_io_read,
+ .write = pv_io_write,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
+static int pv_ioport_initfn(ISADevice *dev)
+{
+ PVIOPortState *s = DO_UPCAST(PVIOPortState, dev, dev);
+
+ if (pv_event_init(&s->conf) < 0) {
+ return -1;
+ }
+
+ memory_region_init_io(&s->ioport, &pv_io_ops, s, "pv_event", 1);
+ isa_register_ioport(dev, &s->ioport, KVM_PV_EVENT_PORT);
+
+ return 0;
+}
+
+static Property pv_ioport_properties[] = {
+ DEFINE_PV_EVENT_PROPERTIES(PVIOPortState, conf),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pv_ioport_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ ISADeviceClass *ic = ISA_DEVICE_CLASS(klass);
+
+ ic->init = pv_ioport_initfn;
+ dc->no_user = 1;
+ dc->props = pv_ioport_properties;
+}
+
+static TypeInfo pv_ioport_info = {
+ .name = PV_EVENT_DRIVER,
+ .parent = TYPE_ISA_DEVICE,
+ .instance_size = sizeof(PVIOPortState),
+ .class_init = pv_ioport_class_init,
+};
+
+static void pv_ioport_register_types(void)
+{
+ type_register_static(&pv_ioport_info);
+}
+
+type_init(pv_ioport_register_types)
+
+void kvm_pv_event_init(void *opaque)
+{
+ ISABus *bus = opaque;
+ ISADevice *dev;
+
+ dev = isa_create(bus, PV_EVENT_DRIVER);
+ qdev_init_nofail(&dev->qdev);
+}
+
+#else
+
+/*
+ * If you need a qom device to handle pv event, this device should
+ * be created and initialized in kvm_pv_event_init().
+ *
+ * The parameter opaque is the device's parent bus.
+ */
+void kvm_pv_event_init(void *opaque)
+{
+}
+#endif
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 88ff041..37eca23 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -46,6 +46,7 @@
#ifdef CONFIG_XEN
# include <xen/hvm/hvm_info_table.h>
#endif
+#include <linux/kvm_para.h>
#define MAX_IDE_BUS 2
@@ -285,6 +286,10 @@ static void pc_init1(MemoryRegion *system_memory,
if (pci_enabled) {
pc_pci_device_init(pci_bus);
}
+
+ if (kvm_enabled()) {
+ kvm_pv_event_init(isa_bus);
+ }
}
static void pc_init_pci(ram_addr_t ram_size,
diff --git a/kvm-stub.c b/kvm-stub.c
index 94c9ea1..e8dc747 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -160,3 +160,7 @@ int kvm_irqchip_remove_irq_notifier(KVMState *s, EventNotifier *n, int virq)
{
return -ENOSYS;
}
+
+void kvm_pv_event_init(void *opaque)
+{
+}
diff --git a/kvm.h b/kvm.h
index 5b8f588..41ce1b2 100644
--- a/kvm.h
+++ b/kvm.h
@@ -276,4 +276,6 @@ int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq);
int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq);
int kvm_irqchip_add_irq_notifier(KVMState *s, EventNotifier *n, int virq);
int kvm_irqchip_remove_irq_notifier(KVMState *s, EventNotifier *n, int virq);
+
+void kvm_pv_event_init(void *opaque);
#endif
--
1.7.1
Signed-off-by: Wen Congyang <[email protected]>
---
hw/pc_piix.c | 6 +++++-
qemu-config.c | 4 ++++
qemu-options.hx | 3 ++-
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 37eca23..10531a8 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -149,6 +149,8 @@ static void pc_init1(MemoryRegion *system_memory,
MemoryRegion *pci_memory;
MemoryRegion *rom_memory;
void *fw_cfg = NULL;
+ QemuOptsList *list = qemu_find_opts("machine");
+ bool enable_pv_event;
pc_cpus_init(cpu_model);
@@ -287,7 +289,9 @@ static void pc_init1(MemoryRegion *system_memory,
pc_pci_device_init(pci_bus);
}
- if (kvm_enabled()) {
+ enable_pv_event = qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
+ "enable_pv_event", false);
+ if (kvm_enabled() && enable_pv_event) {
kvm_pv_event_init(isa_bus);
}
}
diff --git a/qemu-config.c b/qemu-config.c
index c05ffbc..a58bf71 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -612,6 +612,10 @@ static QemuOptsList qemu_machine_opts = {
.name = "dump-guest-core",
.type = QEMU_OPT_BOOL,
.help = "Include guest memory in a core dump",
+ }, {
+ .name = "enable_pv_event",
+ .type = QEMU_OPT_BOOL,
+ .help = "handle pv event"
},
{ /* End of list */ }
},
diff --git a/qemu-options.hx b/qemu-options.hx
index 3c411c4..c825d66 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -38,7 +38,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
" supported accelerators are kvm, xen, tcg (default: tcg)\n"
" kernel_irqchip=on|off controls accelerated irqchip support\n"
" kvm_shadow_mem=size of KVM shadow MMU\n"
- " dump-guest-core=on|off include guest memory in a core dump (default=on)\n",
+ " dump-guest-core=on|off include guest memory in a core dump (default=on)\n"
+ " enable_pv_event=on|off controls pv event support\n",
QEMU_ARCH_ALL)
STEXI
@item -machine [type=]@var{name}[,prop=@var{value}[,...]]
--
1.7.1
On 08/29/2012 07:18 AM, Wen Congyang wrote:
> We can know the guest is panicked when the guest runs on xen.
> But we do not have such feature on kvm.
>
> Another purpose of this feature is: management app(for example:
> libvirt) can do auto dump when the guest is panicked. If management
> app does not do auto dump, the guest's user can do dump by hand if
> he sees the guest is panicked.
>
> We have three solutions to implement this feature:
> 1. use vmcall
> 2. use I/O port
> 3. use virtio-serial.
>
> We have decided to avoid touching hypervisor. The reason why I choose
> choose the I/O port is:
> 1. it is easier to implememt
> 2. it does not depend any virtual device
> 3. it can work when starting the kernel
>
> Signed-off-by: Wen Congyang <[email protected]>
> ---
> Documentation/virtual/kvm/pv_event.txt | 32 ++++++++++++++++++++++++++++++++
> arch/ia64/include/asm/kvm_para.h | 14 ++++++++++++++
> arch/powerpc/include/asm/kvm_para.h | 14 ++++++++++++++
> arch/s390/include/asm/kvm_para.h | 14 ++++++++++++++
> arch/x86/include/asm/kvm_para.h | 27 +++++++++++++++++++++++++++
> arch/x86/kernel/kvm.c | 25 +++++++++++++++++++++++++
> include/linux/kvm_para.h | 23 +++++++++++++++++++++++
> 7 files changed, 149 insertions(+), 0 deletions(-)
> create mode 100644 Documentation/virtual/kvm/pv_event.txt
>
> diff --git a/Documentation/virtual/kvm/pv_event.txt b/Documentation/virtual/kvm/pv_event.txt
> new file mode 100644
> index 0000000..bb04de0
> --- /dev/null
> +++ b/Documentation/virtual/kvm/pv_event.txt
> @@ -0,0 +1,32 @@
> +The KVM paravirtual event interface
> +=================================
> +
> +Initializing the paravirtual event interface
> +======================
> +kvm_pv_event_init()
> +Argiments:
> + None
> +
> +Return Value:
> + 0: The guest kernel can use paravirtual event interface.
> + 1: The guest kernel can't use paravirtual event interface.
> +
> +Querying whether the event can be ejected
> +======================
> +kvm_pv_has_feature()
> +Arguments:
> + feature: The bit value of this paravirtual event to query
> +
> +Return Value:
> + 0 : The guest kernel can't eject this paravirtual event.
> + -1: The guest kernel can eject this paravirtual event.
> +
> +
> +Ejecting paravirtual event
> +======================
> +kvm_pv_eject_event()
> +Arguments:
> + event: The event to be ejected.
> +
> +Return Value:
> + None
What's the protocol for communicating with the hypervisor? What is it supposed
to do on reads/writes to that ioport?
> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> index 2f7712e..7d297f0 100644
> --- a/arch/x86/include/asm/kvm_para.h
> +++ b/arch/x86/include/asm/kvm_para.h
> @@ -96,8 +96,11 @@ struct kvm_vcpu_pv_apf_data {
> #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
> #define KVM_PV_EOI_DISABLED 0x0
>
> +#define KVM_PV_EVENT_PORT (0x505UL)
> +
> #ifdef __KERNEL__
> #include <asm/processor.h>
> +#include <linux/ioport.h>
>
> extern void kvmclock_init(void);
> extern int kvm_register_clock(char *txt);
> @@ -228,6 +231,30 @@ static inline void kvm_disable_steal_time(void)
> }
> #endif
>
> +static inline int kvm_arch_pv_event_init(void)
> +{
> + if (!request_region(KVM_PV_EVENT_PORT, 1, "KVM_PV_EVENT"))
Only one byte is requested here, but the rest of the code is reading/writing longs?
The struct resource * returned from request_region is simply being leaked here?
What happens if we go ahead with adding another event (let's say OOM event)?
request_region() is going to fail for anything but the first call.
> + return -1;
This return value doesn't correspond with the documentation.
> +
> + return 0;
> +}
> +
> +static inline unsigned int kvm_arch_pv_features(void)
> +{
> + unsigned int features = inl(KVM_PV_EVENT_PORT);
> +
> + /* Reading from an invalid I/O port will return -1 */
Just wondering, where is that documented? For lkvm for example the return value
from an ioport without a device on the other side is undefined, so it's possible
we're doing something wrong there.
> + if (features == ~0)
> + features = 0;
> +
> + return features;
> +}
> +
> +static inline void kvm_arch_pv_eject_event(unsigned int event)
> +{
> + outl(event, KVM_PV_EVENT_PORT);
> +}
> +
> #endif /* __KERNEL__ */
>
> #endif /* _ASM_X86_KVM_PARA_H */
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index c1d61ee..6129459 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -368,6 +368,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
> .notifier_call = kvm_pv_reboot_notify,
> };
>
> +static int
> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
> +{
> + kvm_pv_eject_event(KVM_PV_EVENT_PANICKED);
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block kvm_pv_panic_nb = {
> + .notifier_call = kvm_pv_panic_notify,
> +};
> +
> static u64 kvm_steal_clock(int cpu)
> {
> u64 steal;
> @@ -447,6 +458,20 @@ static void __init kvm_apf_trap_init(void)
> set_intr_gate(14, &async_page_fault);
> }
>
> +static void __init kvm_pv_panicked_event_init(void)
> +{
> + if (!kvm_para_available())
> + return;
> +
> + if (kvm_pv_event_init())
> + return;
> +
> + if (kvm_pv_has_feature(KVM_PV_FEATURE_PANICKED))
> + atomic_notifier_chain_register(&panic_notifier_list,
> + &kvm_pv_panic_nb);
> +}
> +arch_initcall(kvm_pv_panicked_event_init);
So it starts automatically on boot? Is there a way to disable it?
Thanks,
Sasha
At 08/29/2012 07:56 PM, Sasha Levin Wrote:
> On 08/29/2012 07:18 AM, Wen Congyang wrote:
>> We can know the guest is panicked when the guest runs on xen.
>> But we do not have such feature on kvm.
>>
>> Another purpose of this feature is: management app(for example:
>> libvirt) can do auto dump when the guest is panicked. If management
>> app does not do auto dump, the guest's user can do dump by hand if
>> he sees the guest is panicked.
>>
>> We have three solutions to implement this feature:
>> 1. use vmcall
>> 2. use I/O port
>> 3. use virtio-serial.
>>
>> We have decided to avoid touching hypervisor. The reason why I choose
>> choose the I/O port is:
>> 1. it is easier to implememt
>> 2. it does not depend any virtual device
>> 3. it can work when starting the kernel
>>
>> Signed-off-by: Wen Congyang <[email protected]>
>> ---
>> Documentation/virtual/kvm/pv_event.txt | 32 ++++++++++++++++++++++++++++++++
>> arch/ia64/include/asm/kvm_para.h | 14 ++++++++++++++
>> arch/powerpc/include/asm/kvm_para.h | 14 ++++++++++++++
>> arch/s390/include/asm/kvm_para.h | 14 ++++++++++++++
>> arch/x86/include/asm/kvm_para.h | 27 +++++++++++++++++++++++++++
>> arch/x86/kernel/kvm.c | 25 +++++++++++++++++++++++++
>> include/linux/kvm_para.h | 23 +++++++++++++++++++++++
>> 7 files changed, 149 insertions(+), 0 deletions(-)
>> create mode 100644 Documentation/virtual/kvm/pv_event.txt
>>
>> diff --git a/Documentation/virtual/kvm/pv_event.txt b/Documentation/virtual/kvm/pv_event.txt
>> new file mode 100644
>> index 0000000..bb04de0
>> --- /dev/null
>> +++ b/Documentation/virtual/kvm/pv_event.txt
>> @@ -0,0 +1,32 @@
>> +The KVM paravirtual event interface
>> +=================================
>> +
>> +Initializing the paravirtual event interface
>> +======================
>> +kvm_pv_event_init()
>> +Argiments:
>> + None
>> +
>> +Return Value:
>> + 0: The guest kernel can use paravirtual event interface.
>> + 1: The guest kernel can't use paravirtual event interface.
>> +
>> +Querying whether the event can be ejected
>> +======================
>> +kvm_pv_has_feature()
>> +Arguments:
>> + feature: The bit value of this paravirtual event to query
>> +
>> +Return Value:
>> + 0 : The guest kernel can't eject this paravirtual event.
>> + -1: The guest kernel can eject this paravirtual event.
>> +
>> +
>> +Ejecting paravirtual event
>> +======================
>> +kvm_pv_eject_event()
>> +Arguments:
>> + event: The event to be ejected.
>> +
>> +Return Value:
>> + None
>
> What's the protocol for communicating with the hypervisor? What is it supposed
> to do on reads/writes to that ioport?
Not only ioport, the other arch can use some other ways. We can use
these APIs to eject event to hypervisor. The caller does not care how
to communicate with the hypervisor.
>
>> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
>> index 2f7712e..7d297f0 100644
>> --- a/arch/x86/include/asm/kvm_para.h
>> +++ b/arch/x86/include/asm/kvm_para.h
>> @@ -96,8 +96,11 @@ struct kvm_vcpu_pv_apf_data {
>> #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
>> #define KVM_PV_EOI_DISABLED 0x0
>>
>> +#define KVM_PV_EVENT_PORT (0x505UL)
>> +
>> #ifdef __KERNEL__
>> #include <asm/processor.h>
>> +#include <linux/ioport.h>
>>
>> extern void kvmclock_init(void);
>> extern int kvm_register_clock(char *txt);
>> @@ -228,6 +231,30 @@ static inline void kvm_disable_steal_time(void)
>> }
>> #endif
>>
>> +static inline int kvm_arch_pv_event_init(void)
>> +{
>> + if (!request_region(KVM_PV_EVENT_PORT, 1, "KVM_PV_EVENT"))
>
> Only one byte is requested here, but the rest of the code is reading/writing longs?
>
> The struct resource * returned from request_region is simply being leaked here?
>
> What happens if we go ahead with adding another event (let's say OOM event)?
> request_region() is going to fail for anything but the first call.
For x86, we use ioport to communicate with hypervisor. We can read a 32bit value
from the hypervisor. If the bit0 is setted, it means the hypervisor supports
panicked event. If you want add another event, you can use another unused bit.
I think 32 events are enough now.
You can write a value to the ioport to eject the event. Only one event can be
ejected at a time.
>
>> + return -1;
>
> This return value doesn't correspond with the documentation.
Yes, I will update the document. Thanks for pointing it out.
>
>> +
>> + return 0;
>> +}
>> +
>> +static inline unsigned int kvm_arch_pv_features(void)
>> +{
>> + unsigned int features = inl(KVM_PV_EVENT_PORT);
>> +
>> + /* Reading from an invalid I/O port will return -1 */
>
> Just wondering, where is that documented? For lkvm for example the return value
> from an ioport without a device on the other side is undefined, so it's possible
> we're doing something wrong there.
Hmm, how to use lkvm? Can you give me a example. So I can test this patch on lkvm.
For qemu, it returns -1. I don't know which is right now. I will investigate it.
>
>> + if (features == ~0)
>> + features = 0;
>> +
>> + return features;
>> +}
>> +
>> +static inline void kvm_arch_pv_eject_event(unsigned int event)
>> +{
>> + outl(event, KVM_PV_EVENT_PORT);
>> +}
>> +
>> #endif /* __KERNEL__ */
>>
>> #endif /* _ASM_X86_KVM_PARA_H */
>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>> index c1d61ee..6129459 100644
>> --- a/arch/x86/kernel/kvm.c
>> +++ b/arch/x86/kernel/kvm.c
>> @@ -368,6 +368,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
>> .notifier_call = kvm_pv_reboot_notify,
>> };
>>
>> +static int
>> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
>> +{
>> + kvm_pv_eject_event(KVM_PV_EVENT_PANICKED);
>> + return NOTIFY_DONE;
>> +}
>> +
>> +static struct notifier_block kvm_pv_panic_nb = {
>> + .notifier_call = kvm_pv_panic_notify,
>> +};
>> +
>> static u64 kvm_steal_clock(int cpu)
>> {
>> u64 steal;
>> @@ -447,6 +458,20 @@ static void __init kvm_apf_trap_init(void)
>> set_intr_gate(14, &async_page_fault);
>> }
>>
>> +static void __init kvm_pv_panicked_event_init(void)
>> +{
>> + if (!kvm_para_available())
>> + return;
>> +
>> + if (kvm_pv_event_init())
>> + return;
>> +
>> + if (kvm_pv_has_feature(KVM_PV_FEATURE_PANICKED))
>> + atomic_notifier_chain_register(&panic_notifier_list,
>> + &kvm_pv_panic_nb);
>> +}
>> +arch_initcall(kvm_pv_panicked_event_init);
>
> So it starts automatically on boot? Is there a way to disable it?
Hmm, there is no way to disable it now.
Thanks
Wen Congyang
>
>
> Thanks,
> Sasha
>
On 08/30/2012 04:03 AM, Wen Congyang wrote:
> At 08/29/2012 07:56 PM, Sasha Levin Wrote:
>> On 08/29/2012 07:18 AM, Wen Congyang wrote:
>>> diff --git a/Documentation/virtual/kvm/pv_event.txt b/Documentation/virtual/kvm/pv_event.txt
>>> new file mode 100644
>>> index 0000000..bb04de0
>>> --- /dev/null
>>> +++ b/Documentation/virtual/kvm/pv_event.txt
>>> @@ -0,0 +1,32 @@
>>> +The KVM paravirtual event interface
>>> +=================================
>>> +
>>> +Initializing the paravirtual event interface
>>> +======================
>>> +kvm_pv_event_init()
>>> +Argiments:
>>> + None
>>> +
>>> +Return Value:
>>> + 0: The guest kernel can use paravirtual event interface.
>>> + 1: The guest kernel can't use paravirtual event interface.
>>> +
>>> +Querying whether the event can be ejected
>>> +======================
>>> +kvm_pv_has_feature()
>>> +Arguments:
>>> + feature: The bit value of this paravirtual event to query
>>> +
>>> +Return Value:
>>> + 0 : The guest kernel can't eject this paravirtual event.
>>> + -1: The guest kernel can eject this paravirtual event.
>>> +
>>> +
>>> +Ejecting paravirtual event
>>> +======================
>>> +kvm_pv_eject_event()
>>> +Arguments:
>>> + event: The event to be ejected.
>>> +
>>> +Return Value:
>>> + None
>>
>> What's the protocol for communicating with the hypervisor? What is it supposed
>> to do on reads/writes to that ioport?
>
> Not only ioport, the other arch can use some other ways. We can use
> these APIs to eject event to hypervisor. The caller does not care how
> to communicate with the hypervisor.
Right, it can work in several ways, but the protocol (whatever it may be)
between the hypervisor and the guest kernel should be documented here as well.
>>> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
>>> index 2f7712e..7d297f0 100644
>>> --- a/arch/x86/include/asm/kvm_para.h
>>> +++ b/arch/x86/include/asm/kvm_para.h
>>> @@ -96,8 +96,11 @@ struct kvm_vcpu_pv_apf_data {
>>> #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
>>> #define KVM_PV_EOI_DISABLED 0x0
>>>
>>> +#define KVM_PV_EVENT_PORT (0x505UL)
>>> +
>>> #ifdef __KERNEL__
>>> #include <asm/processor.h>
>>> +#include <linux/ioport.h>
>>>
>>> extern void kvmclock_init(void);
>>> extern int kvm_register_clock(char *txt);
>>> @@ -228,6 +231,30 @@ static inline void kvm_disable_steal_time(void)
>>> }
>>> #endif
>>>
>>> +static inline int kvm_arch_pv_event_init(void)
>>> +{
>>> + if (!request_region(KVM_PV_EVENT_PORT, 1, "KVM_PV_EVENT"))
>>
>> Only one byte is requested here, but the rest of the code is reading/writing longs?
>>
>> The struct resource * returned from request_region is simply being leaked here?
>>
>> What happens if we go ahead with adding another event (let's say OOM event)?
>> request_region() is going to fail for anything but the first call.
>
> For x86, we use ioport to communicate with hypervisor. We can read a 32bit value
> from the hypervisor. If the bit0 is setted, it means the hypervisor supports
> panicked event. If you want add another event, you can use another unused bit.
> I think 32 events are enough now.
>
> You can write a value to the ioport to eject the event. Only one event can be
> ejected at a time.
I was trying to point out that kvm_pv_event_init() would fail on anything but
the first call, while the API suggests it should be called to verify we can
write events.
>>> +static inline unsigned int kvm_arch_pv_features(void)
>>> +{
>>> + unsigned int features = inl(KVM_PV_EVENT_PORT);
>>> +
>>> + /* Reading from an invalid I/O port will return -1 */
>>
>> Just wondering, where is that documented? For lkvm for example the return value
>> from an ioport without a device on the other side is undefined, so it's possible
>> we're doing something wrong there.
>
> Hmm, how to use lkvm? Can you give me a example. So I can test this patch on lkvm.
You can grab lkvm from https://github.com/penberg/linux-kvm , it lives under
tools/kvm/ in the kernel tree.
> For qemu, it returns -1. I don't know which is right now. I will investigate it.
Thing is, unless x86 arch suggests it should return something specific in this
case, we can't assume a return value either from lkvm or qemu.
Thanks,
Sasha
On Wed, 29 Aug 2012 13:18:54 +0800
Wen Congyang <[email protected]> wrote:
> We can know the guest is panicked when the guest runs on xen.
> But we do not have such feature on kvm.
What's the status of this series?
It got lost in my queue and I ended up not reviewing it, but it seems
to be stuck.
>
> Another purpose of this feature is: management app(for example:
> libvirt) can do auto dump when the guest is panicked. If management
> app does not do auto dump, the guest's user can do dump by hand if
> he sees the guest is panicked.
>
> We have three solutions to implement this feature:
> 1. use vmcall
> 2. use I/O port
> 3. use virtio-serial.
>
> We have decided to avoid touching hypervisor. The reason why I choose
> choose the I/O port is:
> 1. it is easier to implememt
> 2. it does not depend any virtual device
> 3. it can work when starting the kernel
>
> Signed-off-by: Wen Congyang <[email protected]>
> ---
> Documentation/virtual/kvm/pv_event.txt | 32 ++++++++++++++++++++++++++++++++
> arch/ia64/include/asm/kvm_para.h | 14 ++++++++++++++
> arch/powerpc/include/asm/kvm_para.h | 14 ++++++++++++++
> arch/s390/include/asm/kvm_para.h | 14 ++++++++++++++
> arch/x86/include/asm/kvm_para.h | 27 +++++++++++++++++++++++++++
> arch/x86/kernel/kvm.c | 25 +++++++++++++++++++++++++
> include/linux/kvm_para.h | 23 +++++++++++++++++++++++
> 7 files changed, 149 insertions(+), 0 deletions(-)
> create mode 100644 Documentation/virtual/kvm/pv_event.txt
>
> diff --git a/Documentation/virtual/kvm/pv_event.txt b/Documentation/virtual/kvm/pv_event.txt
> new file mode 100644
> index 0000000..bb04de0
> --- /dev/null
> +++ b/Documentation/virtual/kvm/pv_event.txt
> @@ -0,0 +1,32 @@
> +The KVM paravirtual event interface
> +=================================
> +
> +Initializing the paravirtual event interface
> +======================
> +kvm_pv_event_init()
> +Argiments:
> + None
> +
> +Return Value:
> + 0: The guest kernel can use paravirtual event interface.
> + 1: The guest kernel can't use paravirtual event interface.
> +
> +Querying whether the event can be ejected
> +======================
> +kvm_pv_has_feature()
> +Arguments:
> + feature: The bit value of this paravirtual event to query
> +
> +Return Value:
> + 0 : The guest kernel can't eject this paravirtual event.
> + -1: The guest kernel can eject this paravirtual event.
> +
> +
> +Ejecting paravirtual event
> +======================
> +kvm_pv_eject_event()
> +Arguments:
> + event: The event to be ejected.
> +
> +Return Value:
> + None
> diff --git a/arch/ia64/include/asm/kvm_para.h b/arch/ia64/include/asm/kvm_para.h
> index 2019cb9..b5ec658 100644
> --- a/arch/ia64/include/asm/kvm_para.h
> +++ b/arch/ia64/include/asm/kvm_para.h
> @@ -31,6 +31,20 @@ static inline bool kvm_check_and_clear_guest_paused(void)
> return false;
> }
>
> +static inline int kvm_arch_pv_event_init(void)
> +{
> + return 0;
> +}
> +
> +static inline unsigned int kvm_arch_pv_features(void)
> +{
> + return 0;
> +}
> +
> +static inline void kvm_arch_pv_eject_event(unsigned int event)
> +{
> +}
> +
> #endif
>
> #endif
> diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
> index c18916b..01b98c7 100644
> --- a/arch/powerpc/include/asm/kvm_para.h
> +++ b/arch/powerpc/include/asm/kvm_para.h
> @@ -211,6 +211,20 @@ static inline bool kvm_check_and_clear_guest_paused(void)
> return false;
> }
>
> +static inline int kvm_arch_pv_event_init(void)
> +{
> + return 0;
> +}
> +
> +static inline unsigned int kvm_arch_pv_features(void)
> +{
> + return 0;
> +}
> +
> +static inline void kvm_arch_pv_eject_event(unsigned int event)
> +{
> +}
> +
> #endif /* __KERNEL__ */
>
> #endif /* __POWERPC_KVM_PARA_H__ */
> diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
> index da44867..00ce058 100644
> --- a/arch/s390/include/asm/kvm_para.h
> +++ b/arch/s390/include/asm/kvm_para.h
> @@ -154,6 +154,20 @@ static inline bool kvm_check_and_clear_guest_paused(void)
> return false;
> }
>
> +static inline int kvm_arch_pv_event_init(void)
> +{
> + return 0;
> +}
> +
> +static inline unsigned int kvm_arch_pv_features(void)
> +{
> + return 0;
> +}
> +
> +static inline void kvm_arch_pv_eject_event(unsigned int event)
> +{
> +}
> +
> #endif
>
> #endif /* __S390_KVM_PARA_H */
> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> index 2f7712e..7d297f0 100644
> --- a/arch/x86/include/asm/kvm_para.h
> +++ b/arch/x86/include/asm/kvm_para.h
> @@ -96,8 +96,11 @@ struct kvm_vcpu_pv_apf_data {
> #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
> #define KVM_PV_EOI_DISABLED 0x0
>
> +#define KVM_PV_EVENT_PORT (0x505UL)
> +
> #ifdef __KERNEL__
> #include <asm/processor.h>
> +#include <linux/ioport.h>
>
> extern void kvmclock_init(void);
> extern int kvm_register_clock(char *txt);
> @@ -228,6 +231,30 @@ static inline void kvm_disable_steal_time(void)
> }
> #endif
>
> +static inline int kvm_arch_pv_event_init(void)
> +{
> + if (!request_region(KVM_PV_EVENT_PORT, 1, "KVM_PV_EVENT"))
> + return -1;
> +
> + return 0;
> +}
> +
> +static inline unsigned int kvm_arch_pv_features(void)
> +{
> + unsigned int features = inl(KVM_PV_EVENT_PORT);
> +
> + /* Reading from an invalid I/O port will return -1 */
> + if (features == ~0)
> + features = 0;
> +
> + return features;
> +}
> +
> +static inline void kvm_arch_pv_eject_event(unsigned int event)
> +{
> + outl(event, KVM_PV_EVENT_PORT);
> +}
> +
> #endif /* __KERNEL__ */
>
> #endif /* _ASM_X86_KVM_PARA_H */
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index c1d61ee..6129459 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -368,6 +368,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
> .notifier_call = kvm_pv_reboot_notify,
> };
>
> +static int
> +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
> +{
> + kvm_pv_eject_event(KVM_PV_EVENT_PANICKED);
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block kvm_pv_panic_nb = {
> + .notifier_call = kvm_pv_panic_notify,
> +};
> +
> static u64 kvm_steal_clock(int cpu)
> {
> u64 steal;
> @@ -447,6 +458,20 @@ static void __init kvm_apf_trap_init(void)
> set_intr_gate(14, &async_page_fault);
> }
>
> +static void __init kvm_pv_panicked_event_init(void)
> +{
> + if (!kvm_para_available())
> + return;
> +
> + if (kvm_pv_event_init())
> + return;
> +
> + if (kvm_pv_has_feature(KVM_PV_FEATURE_PANICKED))
> + atomic_notifier_chain_register(&panic_notifier_list,
> + &kvm_pv_panic_nb);
> +}
> +arch_initcall(kvm_pv_panicked_event_init);
> +
> void __init kvm_guest_init(void)
> {
> int i;
> diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
> index ff476dd..8e0fb81 100644
> --- a/include/linux/kvm_para.h
> +++ b/include/linux/kvm_para.h
> @@ -20,6 +20,12 @@
> #define KVM_HC_FEATURES 3
> #define KVM_HC_PPC_MAP_MAGIC_PAGE 4
>
> +/* The bit of supported pv event */
> +#define KVM_PV_FEATURE_PANICKED 0
> +
> +/* The pv event value */
> +#define KVM_PV_EVENT_PANICKED 1
> +
> /*
> * hypercalls use architecture specific
> */
> @@ -33,5 +39,22 @@ static inline int kvm_para_has_feature(unsigned int feature)
> return 1;
> return 0;
> }
> +
> +static inline int kvm_pv_event_init(void)
> +{
> + return kvm_arch_pv_event_init();
> +}
> +
> +static inline int kvm_pv_has_feature(unsigned int feature)
> +{
> + if (kvm_arch_pv_features() & (1UL << feature))
> + return 1;
> + return 0;
> +}
> +
> +static inline void kvm_pv_eject_event(unsigned int event)
> +{
> + kvm_arch_pv_eject_event(event);
> +}
> #endif /* __KERNEL__ */
> #endif /* __LINUX_KVM_PARA_H */
On Mon, Oct 01, 2012 at 03:57:40PM -0300, Luiz Capitulino wrote:
> On Wed, 29 Aug 2012 13:18:54 +0800
> Wen Congyang <[email protected]> wrote:
>
> > We can know the guest is panicked when the guest runs on xen.
> > But we do not have such feature on kvm.
>
> What's the status of this series?
>
> It got lost in my queue and I ended up not reviewing it, but it seems
> to be stuck.
>
> >
> > Another purpose of this feature is: management app(for example:
> > libvirt) can do auto dump when the guest is panicked. If management
> > app does not do auto dump, the guest's user can do dump by hand if
> > he sees the guest is panicked.
> >
> > We have three solutions to implement this feature:
> > 1. use vmcall
> > 2. use I/O port
> > 3. use virtio-serial.
> >
> > We have decided to avoid touching hypervisor. The reason why I choose
> > choose the I/O port is:
> > 1. it is easier to implememt
> > 2. it does not depend any virtual device
> > 3. it can work when starting the kernel
Have you tried to move it early enough to minimize the number
of calls to panic()? Say, how many panics can occur between
first Linux instruction and initialization of netconsole?
Because this interface (all the way to QEMU) is going to be
reimplemented with different code later, in case other architectures
want similar interface, which makes maintenance more difficult.
Using PCI would basically replacing I/O port write with PCI write
(or virtio write).
Avi?
> > Signed-off-by: Wen Congyang <[email protected]>
> > ---
> > Documentation/virtual/kvm/pv_event.txt | 32 ++++++++++++++++++++++++++++++++
> > arch/ia64/include/asm/kvm_para.h | 14 ++++++++++++++
> > arch/powerpc/include/asm/kvm_para.h | 14 ++++++++++++++
> > arch/s390/include/asm/kvm_para.h | 14 ++++++++++++++
> > arch/x86/include/asm/kvm_para.h | 27 +++++++++++++++++++++++++++
> > arch/x86/kernel/kvm.c | 25 +++++++++++++++++++++++++
> > include/linux/kvm_para.h | 23 +++++++++++++++++++++++
> > 7 files changed, 149 insertions(+), 0 deletions(-)
> > create mode 100644 Documentation/virtual/kvm/pv_event.txt
> >
> > diff --git a/Documentation/virtual/kvm/pv_event.txt b/Documentation/virtual/kvm/pv_event.txt
> > new file mode 100644
> > index 0000000..bb04de0
> > --- /dev/null
> > +++ b/Documentation/virtual/kvm/pv_event.txt
> > @@ -0,0 +1,32 @@
> > +The KVM paravirtual event interface
> > +=================================
> > +
> > +Initializing the paravirtual event interface
> > +======================
> > +kvm_pv_event_init()
> > +Argiments:
> > + None
> > +
> > +Return Value:
> > + 0: The guest kernel can use paravirtual event interface.
> > + 1: The guest kernel can't use paravirtual event interface.
> > +
> > +Querying whether the event can be ejected
> > +======================
> > +kvm_pv_has_feature()
> > +Arguments:
> > + feature: The bit value of this paravirtual event to query
> > +
> > +Return Value:
> > + 0 : The guest kernel can't eject this paravirtual event.
> > + -1: The guest kernel can eject this paravirtual event.
> > +
> > +
> > +Ejecting paravirtual event
> > +======================
> > +kvm_pv_eject_event()
> > +Arguments:
> > + event: The event to be ejected.
> > +
> > +Return Value:
> > + None
> > diff --git a/arch/ia64/include/asm/kvm_para.h b/arch/ia64/include/asm/kvm_para.h
> > index 2019cb9..b5ec658 100644
> > --- a/arch/ia64/include/asm/kvm_para.h
> > +++ b/arch/ia64/include/asm/kvm_para.h
> > @@ -31,6 +31,20 @@ static inline bool kvm_check_and_clear_guest_paused(void)
> > return false;
> > }
> >
> > +static inline int kvm_arch_pv_event_init(void)
> > +{
> > + return 0;
> > +}
> > +
> > +static inline unsigned int kvm_arch_pv_features(void)
> > +{
> > + return 0;
> > +}
> > +
> > +static inline void kvm_arch_pv_eject_event(unsigned int event)
> > +{
> > +}
> > +
> > #endif
> >
> > #endif
> > diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
> > index c18916b..01b98c7 100644
> > --- a/arch/powerpc/include/asm/kvm_para.h
> > +++ b/arch/powerpc/include/asm/kvm_para.h
> > @@ -211,6 +211,20 @@ static inline bool kvm_check_and_clear_guest_paused(void)
> > return false;
> > }
> >
> > +static inline int kvm_arch_pv_event_init(void)
> > +{
> > + return 0;
> > +}
> > +
> > +static inline unsigned int kvm_arch_pv_features(void)
> > +{
> > + return 0;
> > +}
> > +
> > +static inline void kvm_arch_pv_eject_event(unsigned int event)
> > +{
> > +}
> > +
> > #endif /* __KERNEL__ */
> >
> > #endif /* __POWERPC_KVM_PARA_H__ */
> > diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
> > index da44867..00ce058 100644
> > --- a/arch/s390/include/asm/kvm_para.h
> > +++ b/arch/s390/include/asm/kvm_para.h
> > @@ -154,6 +154,20 @@ static inline bool kvm_check_and_clear_guest_paused(void)
> > return false;
> > }
> >
> > +static inline int kvm_arch_pv_event_init(void)
> > +{
> > + return 0;
> > +}
> > +
> > +static inline unsigned int kvm_arch_pv_features(void)
> > +{
> > + return 0;
> > +}
> > +
> > +static inline void kvm_arch_pv_eject_event(unsigned int event)
> > +{
> > +}
> > +
> > #endif
> >
> > #endif /* __S390_KVM_PARA_H */
> > diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> > index 2f7712e..7d297f0 100644
> > --- a/arch/x86/include/asm/kvm_para.h
> > +++ b/arch/x86/include/asm/kvm_para.h
> > @@ -96,8 +96,11 @@ struct kvm_vcpu_pv_apf_data {
> > #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
> > #define KVM_PV_EOI_DISABLED 0x0
> >
> > +#define KVM_PV_EVENT_PORT (0x505UL)
> > +
> > #ifdef __KERNEL__
> > #include <asm/processor.h>
> > +#include <linux/ioport.h>
> >
> > extern void kvmclock_init(void);
> > extern int kvm_register_clock(char *txt);
> > @@ -228,6 +231,30 @@ static inline void kvm_disable_steal_time(void)
> > }
> > #endif
> >
> > +static inline int kvm_arch_pv_event_init(void)
> > +{
> > + if (!request_region(KVM_PV_EVENT_PORT, 1, "KVM_PV_EVENT"))
> > + return -1;
> > +
> > + return 0;
> > +}
> > +
> > +static inline unsigned int kvm_arch_pv_features(void)
> > +{
> > + unsigned int features = inl(KVM_PV_EVENT_PORT);
> > +
> > + /* Reading from an invalid I/O port will return -1 */
> > + if (features == ~0)
> > + features = 0;
> > +
> > + return features;
> > +}
> > +
> > +static inline void kvm_arch_pv_eject_event(unsigned int event)
> > +{
> > + outl(event, KVM_PV_EVENT_PORT);
> > +}
> > +
> > #endif /* __KERNEL__ */
> >
> > #endif /* _ASM_X86_KVM_PARA_H */
> > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> > index c1d61ee..6129459 100644
> > --- a/arch/x86/kernel/kvm.c
> > +++ b/arch/x86/kernel/kvm.c
> > @@ -368,6 +368,17 @@ static struct notifier_block kvm_pv_reboot_nb = {
> > .notifier_call = kvm_pv_reboot_notify,
> > };
> >
> > +static int
> > +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
> > +{
> > + kvm_pv_eject_event(KVM_PV_EVENT_PANICKED);
> > + return NOTIFY_DONE;
> > +}
> > +
> > +static struct notifier_block kvm_pv_panic_nb = {
> > + .notifier_call = kvm_pv_panic_notify,
> > +};
> > +
> > static u64 kvm_steal_clock(int cpu)
> > {
> > u64 steal;
> > @@ -447,6 +458,20 @@ static void __init kvm_apf_trap_init(void)
> > set_intr_gate(14, &async_page_fault);
> > }
> >
> > +static void __init kvm_pv_panicked_event_init(void)
> > +{
> > + if (!kvm_para_available())
> > + return;
> > +
> > + if (kvm_pv_event_init())
> > + return;
> > +
> > + if (kvm_pv_has_feature(KVM_PV_FEATURE_PANICKED))
> > + atomic_notifier_chain_register(&panic_notifier_list,
> > + &kvm_pv_panic_nb);
> > +}
> > +arch_initcall(kvm_pv_panicked_event_init);
> > +
> > void __init kvm_guest_init(void)
> > {
> > int i;
> > diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
> > index ff476dd..8e0fb81 100644
> > --- a/include/linux/kvm_para.h
> > +++ b/include/linux/kvm_para.h
> > @@ -20,6 +20,12 @@
> > #define KVM_HC_FEATURES 3
> > #define KVM_HC_PPC_MAP_MAGIC_PAGE 4
> >
> > +/* The bit of supported pv event */
> > +#define KVM_PV_FEATURE_PANICKED 0
> > +
> > +/* The pv event value */
> > +#define KVM_PV_EVENT_PANICKED 1
> > +
> > /*
> > * hypercalls use architecture specific
> > */
> > @@ -33,5 +39,22 @@ static inline int kvm_para_has_feature(unsigned int feature)
> > return 1;
> > return 0;
> > }
> > +
> > +static inline int kvm_pv_event_init(void)
> > +{
> > + return kvm_arch_pv_event_init();
> > +}
> > +
> > +static inline int kvm_pv_has_feature(unsigned int feature)
> > +{
> > + if (kvm_arch_pv_features() & (1UL << feature))
> > + return 1;
> > + return 0;
> > +}
> > +
> > +static inline void kvm_pv_eject_event(unsigned int event)
> > +{
> > + kvm_arch_pv_eject_event(event);
> > +}
> > #endif /* __KERNEL__ */
> > #endif /* __LINUX_KVM_PARA_H */