2015-08-10 01:32:18

by Boris Ostrovsky

[permalink] [raw]
Subject: [PATCH v6 0/6] xen/PMU: PMU support for Xen PV(H) guests

Changes in v6:
* Fix ARM builds (as suggested by Julien):
o Make XEN_SYMS depend on X86 (patch 1)
o Add CONFIG_XEN_HAVE_PVMMU and use it in drivers/xen/sys-hypervisor.c
(patch 2)
* Adjust release dates in Documentation/ABI/testing/sysfs-hypervisor-pmu
(patch 2)


Boris Ostrovsky (6):
xen: xensyms support
xen/PMU: Sysfs interface for setting Xen PMU mode
xen/PMU: Initialization code for Xen PMU
xen/PMU: Describe vendor-specific PMU registers
xen/PMU: Intercept PMU-related MSR and APIC accesses
xen/PMU: PMU emulation code

Documentation/ABI/testing/sysfs-hypervisor-pmu | 23 +
arch/x86/include/asm/xen/hypercall.h | 6 +
arch/x86/include/asm/xen/interface.h | 123 ++++++
arch/x86/xen/Kconfig | 1 +
arch/x86/xen/Makefile | 2 +-
arch/x86/xen/apic.c | 6 +
arch/x86/xen/enlighten.c | 13 +-
arch/x86/xen/pmu.c | 572 +++++++++++++++++++++++++
arch/x86/xen/pmu.h | 15 +
arch/x86/xen/smp.c | 29 +-
arch/x86/xen/suspend.c | 23 +-
drivers/xen/Kconfig | 11 +
drivers/xen/sys-hypervisor.c | 136 +++++-
drivers/xen/xenfs/Makefile | 1 +
drivers/xen/xenfs/super.c | 3 +
drivers/xen/xenfs/xenfs.h | 1 +
drivers/xen/xenfs/xensyms.c | 152 +++++++
include/xen/interface/platform.h | 18 +
include/xen/interface/xen.h | 2 +
include/xen/interface/xenpmu.h | 94 ++++
20 files changed, 1220 insertions(+), 11 deletions(-)
create mode 100644 Documentation/ABI/testing/sysfs-hypervisor-pmu
create mode 100644 arch/x86/xen/pmu.c
create mode 100644 arch/x86/xen/pmu.h
create mode 100644 drivers/xen/xenfs/xensyms.c
create mode 100644 include/xen/interface/xenpmu.h

--
1.8.1.4


2015-08-10 01:33:09

by Boris Ostrovsky

[permalink] [raw]
Subject: [PATCH v6 1/6] xen: xensyms support

Export Xen symbols to dom0 via /proc/xen/xensyms (similar to
/proc/kallsyms).

Signed-off-by: Boris Ostrovsky <[email protected]>
Reviewed-by: David Vrabel <[email protected]>
---
drivers/xen/Kconfig | 8 +++
drivers/xen/xenfs/Makefile | 1 +
drivers/xen/xenfs/super.c | 3 +
drivers/xen/xenfs/xenfs.h | 1 +
drivers/xen/xenfs/xensyms.c | 152 +++++++++++++++++++++++++++++++++++++++
include/xen/interface/platform.h | 18 +++++
6 files changed, 183 insertions(+)
create mode 100644 drivers/xen/xenfs/xensyms.c

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 7cd226d..9367604 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -280,4 +280,12 @@ config XEN_ACPI
def_bool y
depends on X86 && ACPI

+config XEN_SYMS
+ bool "Xen symbols"
+ depends on X86 && XEN_DOM0 && XENFS
+ default y if KALLSYMS
+ help
+ Exports hypervisor symbols (along with their types and addresses) via
+ /proc/xen/xensyms file, similar to /proc/kallsyms
+
endmenu
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
index b019865..1a83010 100644
--- a/drivers/xen/xenfs/Makefile
+++ b/drivers/xen/xenfs/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_XENFS) += xenfs.o

xenfs-y = super.o
xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
+xenfs-$(CONFIG_XEN_SYMS) += xensyms.o
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 06092e0..8559a71 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -57,6 +57,9 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
{ "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
{ "xsd_kva", &xsd_kva_file_ops, S_IRUSR|S_IWUSR},
{ "xsd_port", &xsd_port_file_ops, S_IRUSR|S_IWUSR},
+#ifdef CONFIG_XEN_SYMS
+ { "xensyms", &xensyms_ops, S_IRUSR},
+#endif
{""},
};

diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
index 6b80c77..2c5934e 100644
--- a/drivers/xen/xenfs/xenfs.h
+++ b/drivers/xen/xenfs/xenfs.h
@@ -3,5 +3,6 @@

extern const struct file_operations xsd_kva_file_ops;
extern const struct file_operations xsd_port_file_ops;
+extern const struct file_operations xensyms_ops;

#endif /* _XENFS_XENBUS_H */
diff --git a/drivers/xen/xenfs/xensyms.c b/drivers/xen/xenfs/xensyms.c
new file mode 100644
index 0000000..ed16f8d
--- /dev/null
+++ b/drivers/xen/xenfs/xensyms.c
@@ -0,0 +1,152 @@
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <xen/interface/platform.h>
+#include <asm/xen/hypercall.h>
+#include <xen/xen-ops.h>
+#include "xenfs.h"
+
+
+#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length */
+
+struct xensyms {
+ struct xen_platform_op op;
+ char *name;
+ uint32_t namelen;
+};
+
+/* Grab next output page from the hypervisor */
+static int xensyms_next_sym(struct xensyms *xs)
+{
+ int ret;
+ struct xenpf_symdata *symdata = &xs->op.u.symdata;
+ uint64_t symnum;
+
+ memset(xs->name, 0, xs->namelen);
+ symdata->namelen = xs->namelen;
+
+ symnum = symdata->symnum;
+
+ ret = HYPERVISOR_dom0_op(&xs->op);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * If hypervisor's symbol didn't fit into the buffer then allocate
+ * a larger buffer and try again
+ */
+ if (unlikely(symdata->namelen > xs->namelen)) {
+ kfree(xs->name);
+
+ xs->namelen = symdata->namelen;
+ xs->name = kzalloc(xs->namelen, GFP_KERNEL);
+ if (!xs->name)
+ return 1;
+
+ set_xen_guest_handle(symdata->name, xs->name);
+ symdata->symnum--; /* Rewind */
+
+ ret = HYPERVISOR_dom0_op(&xs->op);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (symdata->symnum == symnum)
+ /* End of symbols */
+ return 1;
+
+ return 0;
+}
+
+static void *xensyms_start(struct seq_file *m, loff_t *pos)
+{
+ struct xensyms *xs = (struct xensyms *)m->private;
+
+ xs->op.u.symdata.symnum = *pos;
+
+ if (xensyms_next_sym(xs))
+ return NULL;
+
+ return m->private;
+}
+
+static void *xensyms_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ struct xensyms *xs = (struct xensyms *)m->private;
+
+ xs->op.u.symdata.symnum = ++(*pos);
+
+ if (xensyms_next_sym(xs))
+ return NULL;
+
+ return p;
+}
+
+static int xensyms_show(struct seq_file *m, void *p)
+{
+ struct xensyms *xs = (struct xensyms *)m->private;
+ struct xenpf_symdata *symdata = &xs->op.u.symdata;
+
+ seq_printf(m, "%016llx %c %s\n", symdata->address,
+ symdata->type, xs->name);
+
+ return 0;
+}
+
+static void xensyms_stop(struct seq_file *m, void *p)
+{
+}
+
+static const struct seq_operations xensyms_seq_ops = {
+ .start = xensyms_start,
+ .next = xensyms_next,
+ .show = xensyms_show,
+ .stop = xensyms_stop,
+};
+
+static int xensyms_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *m;
+ struct xensyms *xs;
+ int ret;
+
+ ret = seq_open_private(file, &xensyms_seq_ops,
+ sizeof(struct xensyms));
+ if (ret)
+ return ret;
+
+ m = file->private_data;
+ xs = (struct xensyms *)m->private;
+
+ xs->namelen = XEN_KSYM_NAME_LEN + 1;
+ xs->name = kzalloc(xs->namelen, GFP_KERNEL);
+ if (!xs->name) {
+ seq_release_private(inode, file);
+ return -ENOMEM;
+ }
+ set_xen_guest_handle(xs->op.u.symdata.name, xs->name);
+ xs->op.cmd = XENPF_get_symbol;
+ xs->op.u.symdata.namelen = xs->namelen;
+
+ return 0;
+}
+
+static int xensyms_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = file->private_data;
+ struct xensyms *xs = (struct xensyms *)m->private;
+
+ kfree(xs->name);
+ return seq_release_private(inode, file);
+}
+
+const struct file_operations xensyms_ops = {
+ .open = xensyms_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = xensyms_release
+};
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index 5cc49ea..8e03587 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -474,6 +474,23 @@ struct xenpf_core_parking {
};
DEFINE_GUEST_HANDLE_STRUCT(xenpf_core_parking);

+#define XENPF_get_symbol 63
+struct xenpf_symdata {
+ /* IN/OUT variables */
+ uint32_t namelen; /* size of 'name' buffer */
+
+ /* IN/OUT variables */
+ uint32_t symnum; /* IN: Symbol to read */
+ /* OUT: Next available symbol. If same as IN */
+ /* then we reached the end */
+
+ /* OUT variables */
+ GUEST_HANDLE(char) name;
+ uint64_t address;
+ char type;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_symdata);
+
struct xen_platform_op {
uint32_t cmd;
uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -495,6 +512,7 @@ struct xen_platform_op {
struct xenpf_cpu_hotadd cpu_add;
struct xenpf_mem_hotadd mem_add;
struct xenpf_core_parking core_parking;
+ struct xenpf_symdata symdata;
uint8_t pad[128];
} u;
};
--
1.8.1.4

2015-08-10 01:32:33

by Boris Ostrovsky

[permalink] [raw]
Subject: [PATCH v6 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode

Set Xen's PMU mode via /sys/hypervisor/pmu/pmu_mode. Add XENPMU hypercall.

Signed-off-by: Boris Ostrovsky <[email protected]>
---
Documentation/ABI/testing/sysfs-hypervisor-pmu | 23 +++++
arch/x86/include/asm/xen/hypercall.h | 6 ++
arch/x86/xen/Kconfig | 1 +
drivers/xen/Kconfig | 3 +
drivers/xen/sys-hypervisor.c | 136 ++++++++++++++++++++++++-
include/xen/interface/xen.h | 1 +
include/xen/interface/xenpmu.h | 59 +++++++++++
7 files changed, 228 insertions(+), 1 deletion(-)
create mode 100644 Documentation/ABI/testing/sysfs-hypervisor-pmu
create mode 100644 include/xen/interface/xenpmu.h

diff --git a/Documentation/ABI/testing/sysfs-hypervisor-pmu b/Documentation/ABI/testing/sysfs-hypervisor-pmu
new file mode 100644
index 0000000..224faa1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-hypervisor-pmu
@@ -0,0 +1,23 @@
+What: /sys/hypervisor/pmu/pmu_mode
+Date: August 2015
+KernelVersion: 4.3
+Contact: Boris Ostrovsky <[email protected]>
+Description:
+ Describes mode that Xen's performance-monitoring unit (PMU)
+ uses. Accepted values are
+ "off" -- PMU is disabled
+ "self" -- The guest can profile itself
+ "hv" -- The guest can profile itself and, if it is
+ privileged (e.g. dom0), the hypervisor
+ "all" -- The guest can profile itself, the hypervisor
+ and all other guests. Only available to
+ privileged guests.
+
+What: /sys/hypervisor/pmu/pmu_features
+Date: August 2015
+KernelVersion: 4.3
+Contact: Boris Ostrovsky <[email protected]>
+Description:
+ Describes Xen PMU features (as an integer). A set bit indicates
+ that the corresponding feature is enabled. See
+ include/xen/interface/xenpmu.h for available features
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index ca08a27..83aea80 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -465,6 +465,12 @@ HYPERVISOR_tmem_op(
return _hypercall1(int, tmem_op, op);
}

+static inline int
+HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
+{
+ return _hypercall2(int, xenpmu_op, op, arg);
+}
+
static inline void
MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
{
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index e88fda8..049cdda 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -7,6 +7,7 @@ config XEN
depends on PARAVIRT
select PARAVIRT_CLOCK
select XEN_HAVE_PVMMU
+ select XEN_HAVE_VPMU
depends on X86_64 || (X86_32 && X86_PAE)
depends on X86_TSC
help
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 9367604..73708ac 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -288,4 +288,7 @@ config XEN_SYMS
Exports hypervisor symbols (along with their types and addresses) via
/proc/xen/xensyms file, similar to /proc/kallsyms

+config XEN_HAVE_VPMU
+ bool
+
endmenu
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index 96453f8..0907275 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -20,6 +20,9 @@
#include <xen/xenbus.h>
#include <xen/interface/xen.h>
#include <xen/interface/version.h>
+#ifdef CONFIG_XEN_HAVE_VPMU
+#include <xen/interface/xenpmu.h>
+#endif

#define HYPERVISOR_ATTR_RO(_name) \
static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
@@ -368,6 +371,126 @@ static void xen_properties_destroy(void)
sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
}

+#ifdef CONFIG_XEN_HAVE_VPMU
+struct pmu_mode {
+ const char *name;
+ uint32_t mode;
+};
+
+struct pmu_mode pmu_modes[] = {
+ {"off", XENPMU_MODE_OFF},
+ {"self", XENPMU_MODE_SELF},
+ {"hv", XENPMU_MODE_HV},
+ {"all", XENPMU_MODE_ALL}
+};
+
+static ssize_t pmu_mode_store(struct hyp_sysfs_attr *attr,
+ const char *buffer, size_t len)
+{
+ int ret;
+ struct xen_pmu_params xp;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
+ if (strncmp(buffer, pmu_modes[i].name, len - 1) == 0) {
+ xp.val = pmu_modes[i].mode;
+ break;
+ }
+ }
+
+ if (i == ARRAY_SIZE(pmu_modes))
+ return -EINVAL;
+
+ xp.version.maj = XENPMU_VER_MAJ;
+ xp.version.min = XENPMU_VER_MIN;
+ ret = HYPERVISOR_xenpmu_op(XENPMU_mode_set, &xp);
+ if (ret)
+ return ret;
+
+ return len;
+}
+
+static ssize_t pmu_mode_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+ int ret;
+ struct xen_pmu_params xp;
+ int i;
+ uint32_t mode;
+
+ xp.version.maj = XENPMU_VER_MAJ;
+ xp.version.min = XENPMU_VER_MIN;
+ ret = HYPERVISOR_xenpmu_op(XENPMU_mode_get, &xp);
+ if (ret)
+ return ret;
+
+ mode = (uint32_t)xp.val;
+ for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
+ if (mode == pmu_modes[i].mode)
+ return sprintf(buffer, "%s\n", pmu_modes[i].name);
+ }
+
+ return -EINVAL;
+}
+HYPERVISOR_ATTR_RW(pmu_mode);
+
+static ssize_t pmu_features_store(struct hyp_sysfs_attr *attr,
+ const char *buffer, size_t len)
+{
+ int ret;
+ uint32_t features;
+ struct xen_pmu_params xp;
+
+ ret = kstrtou32(buffer, 0, &features);
+ if (ret)
+ return ret;
+
+ xp.val = features;
+ xp.version.maj = XENPMU_VER_MAJ;
+ xp.version.min = XENPMU_VER_MIN;
+ ret = HYPERVISOR_xenpmu_op(XENPMU_feature_set, &xp);
+ if (ret)
+ return ret;
+
+ return len;
+}
+
+static ssize_t pmu_features_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+ int ret;
+ struct xen_pmu_params xp;
+
+ xp.version.maj = XENPMU_VER_MAJ;
+ xp.version.min = XENPMU_VER_MIN;
+ ret = HYPERVISOR_xenpmu_op(XENPMU_feature_get, &xp);
+ if (ret)
+ return ret;
+
+ return sprintf(buffer, "0x%x\n", (uint32_t)xp.val);
+}
+HYPERVISOR_ATTR_RW(pmu_features);
+
+static struct attribute *xen_pmu_attrs[] = {
+ &pmu_mode_attr.attr,
+ &pmu_features_attr.attr,
+ NULL
+};
+
+static const struct attribute_group xen_pmu_group = {
+ .name = "pmu",
+ .attrs = xen_pmu_attrs,
+};
+
+static int __init xen_pmu_init(void)
+{
+ return sysfs_create_group(hypervisor_kobj, &xen_pmu_group);
+}
+
+static void xen_pmu_destroy(void)
+{
+ sysfs_remove_group(hypervisor_kobj, &xen_pmu_group);
+}
+#endif
+
static int __init hyper_sysfs_init(void)
{
int ret;
@@ -390,7 +513,15 @@ static int __init hyper_sysfs_init(void)
ret = xen_properties_init();
if (ret)
goto prop_out;
-
+#ifdef CONFIG_XEN_HAVE_VPMU
+ if (xen_initial_domain()) {
+ ret = xen_pmu_init();
+ if (ret) {
+ xen_properties_destroy();
+ goto prop_out;
+ }
+ }
+#endif
goto out;

prop_out:
@@ -407,6 +538,9 @@ out:

static void __exit hyper_sysfs_exit(void)
{
+#ifdef CONFIG_XEN_HAVE_VPMU
+ xen_pmu_destroy();
+#endif
xen_properties_destroy();
xen_compilation_destroy();
xen_sysfs_uuid_destroy();
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index a483789..c6399a1 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -80,6 +80,7 @@
#define __HYPERVISOR_kexec_op 37
#define __HYPERVISOR_tmem_op 38
#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */
+#define __HYPERVISOR_xenpmu_op 40

/* Architecture-specific hypercall definitions. */
#define __HYPERVISOR_arch_0 48
diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
new file mode 100644
index 0000000..eac1b49
--- /dev/null
+++ b/include/xen/interface/xenpmu.h
@@ -0,0 +1,59 @@
+#ifndef __XEN_PUBLIC_XENPMU_H__
+#define __XEN_PUBLIC_XENPMU_H__
+
+#include "xen.h"
+
+#define XENPMU_VER_MAJ 0
+#define XENPMU_VER_MIN 1
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_xenpmu_op(enum xenpmu_op cmd, struct xenpmu_params *args);
+ *
+ * @cmd == XENPMU_* (PMU operation)
+ * @args == struct xenpmu_params
+ */
+/* ` enum xenpmu_op { */
+#define XENPMU_mode_get 0 /* Also used for getting PMU version */
+#define XENPMU_mode_set 1
+#define XENPMU_feature_get 2
+#define XENPMU_feature_set 3
+#define XENPMU_init 4
+#define XENPMU_finish 5
+
+/* ` } */
+
+/* Parameters structure for HYPERVISOR_xenpmu_op call */
+struct xen_pmu_params {
+ /* IN/OUT parameters */
+ struct {
+ uint32_t maj;
+ uint32_t min;
+ } version;
+ uint64_t val;
+
+ /* IN parameters */
+ uint32_t vcpu;
+ uint32_t pad;
+};
+
+/* PMU modes:
+ * - XENPMU_MODE_OFF: No PMU virtualization
+ * - XENPMU_MODE_SELF: Guests can profile themselves
+ * - XENPMU_MODE_HV: Guests can profile themselves, dom0 profiles
+ * itself and Xen
+ * - XENPMU_MODE_ALL: Only dom0 has access to VPMU and it profiles
+ * everyone: itself, the hypervisor and the guests.
+ */
+#define XENPMU_MODE_OFF 0
+#define XENPMU_MODE_SELF (1<<0)
+#define XENPMU_MODE_HV (1<<1)
+#define XENPMU_MODE_ALL (1<<2)
+
+/*
+ * PMU features:
+ * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD)
+ */
+#define XENPMU_FEATURE_INTEL_BTS 1
+
+#endif /* __XEN_PUBLIC_XENPMU_H__ */
--
1.8.1.4

2015-08-10 01:33:03

by Boris Ostrovsky

[permalink] [raw]
Subject: [PATCH v6 3/6] xen/PMU: Initialization code for Xen PMU

Map shared data structure that will hold CPU registers, VPMU context,
V/PCPU IDs of the CPU interrupted by PMU interrupt. Hypervisor fills
this information in its handler and passes it to the guest for further
processing.

Set up PMU VIRQ.

Now that perf infrastructure will assume that PMU is available on a PV
guest we need to be careful and make sure that accesses via RDPMC
instruction don't cause fatal traps by the hypervisor. Provide a nop
RDPMC handler.

For the same reason avoid issuing a warning on a write to APIC's LVTPC.

Both of these will be made functional in later patches.

Signed-off-by: Boris Ostrovsky <[email protected]>
Reviewed-by: David Vrabel <[email protected]>
---
arch/x86/include/asm/xen/interface.h | 123 +++++++++++++++++++++++++
arch/x86/xen/Makefile | 2 +-
arch/x86/xen/apic.c | 3 +
arch/x86/xen/enlighten.c | 12 ++-
arch/x86/xen/pmu.c | 172 +++++++++++++++++++++++++++++++++++
arch/x86/xen/pmu.h | 11 +++
arch/x86/xen/smp.c | 29 +++++-
arch/x86/xen/suspend.c | 23 +++--
include/xen/interface/xen.h | 1 +
include/xen/interface/xenpmu.h | 33 +++++++
10 files changed, 400 insertions(+), 9 deletions(-)
create mode 100644 arch/x86/xen/pmu.c
create mode 100644 arch/x86/xen/pmu.h

diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 3400dba..dced9bc 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -172,6 +172,129 @@ struct vcpu_guest_context {
#endif
};
DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
+
+/* AMD PMU registers and structures */
+struct xen_pmu_amd_ctxt {
+ /*
+ * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
+ * For PV(H) guests these fields are RO.
+ */
+ uint32_t counters;
+ uint32_t ctrls;
+
+ /* Counter MSRs */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ uint64_t regs[];
+#elif defined(__GNUC__)
+ uint64_t regs[0];
+#endif
+};
+
+/* Intel PMU registers and structures */
+struct xen_pmu_cntr_pair {
+ uint64_t counter;
+ uint64_t control;
+};
+
+struct xen_pmu_intel_ctxt {
+ /*
+ * Offsets to fixed and architectural counter MSRs (relative to
+ * xen_pmu_arch.c.intel).
+ * For PV(H) guests these fields are RO.
+ */
+ uint32_t fixed_counters;
+ uint32_t arch_counters;
+
+ /* PMU registers */
+ uint64_t global_ctrl;
+ uint64_t global_ovf_ctrl;
+ uint64_t global_status;
+ uint64_t fixed_ctrl;
+ uint64_t ds_area;
+ uint64_t pebs_enable;
+ uint64_t debugctl;
+
+ /* Fixed and architectural counter MSRs */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ uint64_t regs[];
+#elif defined(__GNUC__)
+ uint64_t regs[0];
+#endif
+};
+
+/* Sampled domain's registers */
+struct xen_pmu_regs {
+ uint64_t ip;
+ uint64_t sp;
+ uint64_t flags;
+ uint16_t cs;
+ uint16_t ss;
+ uint8_t cpl;
+ uint8_t pad[3];
+};
+
+/* PMU flags */
+#define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */
+#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */
+#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */
+#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */
+
+/*
+ * Architecture-specific information describing state of the processor at
+ * the time of PMU interrupt.
+ * Fields of this structure marked as RW for guest should only be written by
+ * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the
+ * hypervisor during PMU interrupt). Hypervisor will read updated data in
+ * XENPMU_flush hypercall and clear PMU_CACHED bit.
+ */
+struct xen_pmu_arch {
+ union {
+ /*
+ * Processor's registers at the time of interrupt.
+ * WO for hypervisor, RO for guests.
+ */
+ struct xen_pmu_regs regs;
+ /*
+ * Padding for adding new registers to xen_pmu_regs in
+ * the future
+ */
+#define XENPMU_REGS_PAD_SZ 64
+ uint8_t pad[XENPMU_REGS_PAD_SZ];
+ } r;
+
+ /* WO for hypervisor, RO for guest */
+ uint64_t pmu_flags;
+
+ /*
+ * APIC LVTPC register.
+ * RW for both hypervisor and guest.
+ * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
+ * during XENPMU_flush or XENPMU_lvtpc_set.
+ */
+ union {
+ uint32_t lapic_lvtpc;
+ uint64_t pad;
+ } l;
+
+ /*
+ * Vendor-specific PMU registers.
+ * RW for both hypervisor and guest (see exceptions above).
+ * Guest's updates to this field are verified and then loaded by the
+ * hypervisor into hardware during XENPMU_flush
+ */
+ union {
+ struct xen_pmu_amd_ctxt amd;
+ struct xen_pmu_intel_ctxt intel;
+
+ /*
+ * Padding for contexts (fixed parts only, does not include
+ * MSR banks that are specified by offsets)
+ */
+#define XENPMU_CTXT_PAD_SZ 128
+ uint8_t pad[XENPMU_CTXT_PAD_SZ];
+ } c;
+};
+
#endif /* !__ASSEMBLY__ */

/*
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 7322755..071fab5 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -13,7 +13,7 @@ CFLAGS_mmu.o := $(nostackp)
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
time.o xen-asm.o xen-asm_$(BITS).o \
grant-table.o suspend.o platform-pci-unplug.o \
- p2m.o
+ p2m.o pmu.o

obj-$(CONFIG_EVENT_TRACING) += trace.o

diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 70e060a..d03ebfa 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -72,6 +72,9 @@ static u32 xen_apic_read(u32 reg)

static void xen_apic_write(u32 reg, u32 val)
{
+ if (reg == APIC_LVTPC)
+ return;
+
/* Warn to see if there's any stray references */
WARN(1,"register: %x, value: %x\n", reg, val);
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0b95c9b..cf46416 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -84,6 +84,7 @@
#include "mmu.h"
#include "smp.h"
#include "multicalls.h"
+#include "pmu.h"

EXPORT_SYMBOL_GPL(hypercall_page);

@@ -1042,6 +1043,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
return ret;
}

+unsigned long long xen_read_pmc(int counter)
+{
+ return 0;
+}
+
void xen_setup_shared_info(void)
{
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1176,7 +1182,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.write_msr = xen_write_msr_safe,

.read_tsc = native_read_tsc,
- .read_pmc = native_read_pmc,
+ .read_pmc = xen_read_pmc,

.read_tscp = native_read_tscp,

@@ -1227,6 +1233,10 @@ static const struct pv_apic_ops xen_apic_ops __initconst = {
static void xen_reboot(int reason)
{
struct sched_shutdown r = { .reason = reason };
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ xen_pmu_finish(cpu);

if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
BUG();
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
new file mode 100644
index 0000000..ba7687c
--- /dev/null
+++ b/arch/x86/xen/pmu.c
@@ -0,0 +1,172 @@
+#include <linux/types.h>
+#include <linux/interrupt.h>
+
+#include <asm/xen/hypercall.h>
+#include <xen/page.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
+#include <xen/interface/xenpmu.h>
+
+#include "xen-ops.h"
+#include "pmu.h"
+
+/* x86_pmu.handle_irq definition */
+#include "../kernel/cpu/perf_event.h"
+
+
+/* Shared page between hypervisor and domain */
+DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
+#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id())
+
+/* perf callbacks*/
+int xen_is_in_guest(void)
+{
+ const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+ return 0;
+ }
+
+ if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
+ return 0;
+
+ return 1;
+}
+
+static int xen_is_user_mode(void)
+{
+ const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+ return 0;
+ }
+
+ if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
+ return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
+ else
+ return !!(xenpmu_data->pmu.r.regs.cpl & 3);
+}
+
+static unsigned long xen_get_guest_ip(void)
+{
+ const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+ return 0;
+ }
+
+ return xenpmu_data->pmu.r.regs.ip;
+}
+
+static struct perf_guest_info_callbacks xen_guest_cbs = {
+ .is_in_guest = xen_is_in_guest,
+ .is_user_mode = xen_is_user_mode,
+ .get_guest_ip = xen_get_guest_ip,
+};
+
+/* Convert registers from Xen's format to Linux' */
+static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
+ struct pt_regs *regs, uint64_t pmu_flags)
+{
+ regs->ip = xen_regs->ip;
+ regs->cs = xen_regs->cs;
+ regs->sp = xen_regs->sp;
+
+ if (pmu_flags & PMU_SAMPLE_PV) {
+ if (pmu_flags & PMU_SAMPLE_USER)
+ regs->cs |= 3;
+ else
+ regs->cs &= ~3;
+ } else {
+ if (xen_regs->cpl)
+ regs->cs |= 3;
+ else
+ regs->cs &= ~3;
+ }
+}
+
+irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
+{
+ int ret = IRQ_NONE;
+ struct pt_regs regs;
+ const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+ return ret;
+ }
+
+ xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
+ xenpmu_data->pmu.pmu_flags);
+ if (x86_pmu.handle_irq(&regs))
+ ret = IRQ_HANDLED;
+
+ return ret;
+}
+
+bool is_xen_pmu(int cpu)
+{
+ return (per_cpu(xenpmu_shared, cpu) != NULL);
+}
+
+void xen_pmu_init(int cpu)
+{
+ int err;
+ struct xen_pmu_params xp;
+ unsigned long pfn;
+ struct xen_pmu_data *xenpmu_data;
+
+ BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
+
+ if (xen_hvm_domain())
+ return;
+
+ xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
+ if (!xenpmu_data) {
+ pr_err("VPMU init: No memory\n");
+ return;
+ }
+ pfn = virt_to_pfn(xenpmu_data);
+
+ xp.val = pfn_to_mfn(pfn);
+ xp.vcpu = cpu;
+ xp.version.maj = XENPMU_VER_MAJ;
+ xp.version.min = XENPMU_VER_MIN;
+ err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
+ if (err)
+ goto fail;
+
+ per_cpu(xenpmu_shared, cpu) = xenpmu_data;
+
+ if (cpu == 0)
+ perf_register_guest_info_callbacks(&xen_guest_cbs);
+
+ return;
+
+fail:
+ pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n",
+ cpu, err);
+ free_pages((unsigned long)xenpmu_data, 0);
+ if (cpu != 0)
+ perf_unregister_guest_info_callbacks(&xen_guest_cbs);
+}
+
+void xen_pmu_finish(int cpu)
+{
+ struct xen_pmu_params xp;
+
+ if (xen_hvm_domain())
+ return;
+
+ xp.vcpu = cpu;
+ xp.version.maj = XENPMU_VER_MAJ;
+ xp.version.min = XENPMU_VER_MIN;
+
+ (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
+
+ free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0);
+ per_cpu(xenpmu_shared, cpu) = NULL;
+}
diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h
new file mode 100644
index 0000000..a76d2cf
--- /dev/null
+++ b/arch/x86/xen/pmu.h
@@ -0,0 +1,11 @@
+#ifndef __XEN_PMU_H
+#define __XEN_PMU_H
+
+#include <xen/interface/xenpmu.h>
+
+irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
+void xen_pmu_init(int cpu);
+void xen_pmu_finish(int cpu);
+bool is_xen_pmu(int cpu);
+
+#endif /* __XEN_PMU_H */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 8648438..2a9ff73 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -26,6 +26,7 @@

#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
+#include <xen/interface/xenpmu.h>

#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
@@ -38,6 +39,7 @@
#include "xen-ops.h"
#include "mmu.h"
#include "smp.h"
+#include "pmu.h"

cpumask_var_t xen_cpu_initialized_map;

@@ -50,6 +52,7 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
+static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };

static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@ -148,11 +151,18 @@ static void xen_smp_intr_free(unsigned int cpu)
kfree(per_cpu(xen_irq_work, cpu).name);
per_cpu(xen_irq_work, cpu).name = NULL;
}
+
+ if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
+ unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
+ per_cpu(xen_pmu_irq, cpu).irq = -1;
+ kfree(per_cpu(xen_pmu_irq, cpu).name);
+ per_cpu(xen_pmu_irq, cpu).name = NULL;
+ }
};
static int xen_smp_intr_init(unsigned int cpu)
{
int rc;
- char *resched_name, *callfunc_name, *debug_name;
+ char *resched_name, *callfunc_name, *debug_name, *pmu_name;

resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -218,6 +228,18 @@ static int xen_smp_intr_init(unsigned int cpu)
per_cpu(xen_irq_work, cpu).irq = rc;
per_cpu(xen_irq_work, cpu).name = callfunc_name;

+ if (is_xen_pmu(cpu)) {
+ pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
+ rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
+ xen_pmu_irq_handler,
+ IRQF_PERCPU|IRQF_NOBALANCING,
+ pmu_name, NULL);
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_pmu_irq, cpu).irq = rc;
+ per_cpu(xen_pmu_irq, cpu).name = pmu_name;
+ }
+
return 0;

fail:
@@ -335,6 +357,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
}
set_cpu_sibling_map(0);

+ xen_pmu_init(0);
+
if (xen_smp_intr_init(0))
BUG();

@@ -462,6 +486,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
if (rc)
return rc;

+ xen_pmu_init(cpu);
+
rc = xen_smp_intr_init(cpu);
if (rc)
return rc;
@@ -503,6 +529,7 @@ static void xen_cpu_die(unsigned int cpu)
xen_smp_intr_free(cpu);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
+ xen_pmu_finish(cpu);
}
}

diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 53b4c08..feddabd 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -11,6 +11,7 @@

#include "xen-ops.h"
#include "mmu.h"
+#include "pmu.h"

static void xen_pv_pre_suspend(void)
{
@@ -67,16 +68,26 @@ static void xen_pv_post_suspend(int suspend_cancelled)

void xen_arch_pre_suspend(void)
{
- if (xen_pv_domain())
- xen_pv_pre_suspend();
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ xen_pmu_finish(cpu);
+
+ if (xen_pv_domain())
+ xen_pv_pre_suspend();
}

void xen_arch_post_suspend(int cancelled)
{
- if (xen_pv_domain())
- xen_pv_post_suspend(cancelled);
- else
- xen_hvm_post_suspend(cancelled);
+ int cpu;
+
+ if (xen_pv_domain())
+ xen_pv_post_suspend(cancelled);
+ else
+ xen_hvm_post_suspend(cancelled);
+
+ for_each_online_cpu(cpu)
+ xen_pmu_init(cpu);
}

static void xen_vcpu_notify_restore(void *data)
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index c6399a1..d40fff1 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -113,6 +113,7 @@
#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */
#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */
#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */
+#define VIRQ_XENPMU 13 /* PMC interrupt */

/* Architecture-specific VIRQ definitions. */
#define VIRQ_ARCH_0 16
diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
index eac1b49..ca42301 100644
--- a/include/xen/interface/xenpmu.h
+++ b/include/xen/interface/xenpmu.h
@@ -56,4 +56,37 @@ struct xen_pmu_params {
*/
#define XENPMU_FEATURE_INTEL_BTS 1

+/*
+ * Shared PMU data between hypervisor and PV(H) domains.
+ *
+ * The hypervisor fills out this structure during PMU interrupt and sends an
+ * interrupt to appropriate VCPU.
+ * Architecture-independent fields of xen_pmu_data are WO for the hypervisor
+ * and RO for the guest but some fields in xen_pmu_arch can be writable
+ * by both the hypervisor and the guest (see arch-$arch/pmu.h).
+ */
+struct xen_pmu_data {
+ /* Interrupted VCPU */
+ uint32_t vcpu_id;
+
+ /*
+ * Physical processor on which the interrupt occurred. On non-privileged
+ * guests set to vcpu_id;
+ */
+ uint32_t pcpu_id;
+
+ /*
+ * Domain that was interrupted. On non-privileged guests set to
+ * DOMID_SELF.
+ * On privileged guests can be DOMID_SELF, DOMID_XEN, or, when in
+ * XENPMU_MODE_ALL mode, domain ID of another domain.
+ */
+ domid_t domain_id;
+
+ uint8_t pad[6];
+
+ /* Architecture-specific information */
+ struct xen_pmu_arch pmu;
+};
+
#endif /* __XEN_PUBLIC_XENPMU_H__ */
--
1.8.1.4

2015-08-10 01:32:14

by Boris Ostrovsky

[permalink] [raw]
Subject: [PATCH v6 4/6] xen/PMU: Describe vendor-specific PMU registers

AMD and Intel PMU register initialization and helpers that determine
whether a register belongs to PMU.

This and some of subsequent PMU emulation code is somewhat similar to
Xen's PMU implementation.

Signed-off-by: Boris Ostrovsky <[email protected]>
Reviewed-by: David Vrabel <[email protected]>
---
arch/x86/xen/pmu.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 152 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index ba7687c..1fc7e10 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -18,6 +18,155 @@
DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id())

+
+/* AMD PMU */
+#define F15H_NUM_COUNTERS 6
+#define F10H_NUM_COUNTERS 4
+
+static __read_mostly uint32_t amd_counters_base;
+static __read_mostly uint32_t amd_ctrls_base;
+static __read_mostly int amd_msr_step;
+static __read_mostly int k7_counters_mirrored;
+static __read_mostly int amd_num_counters;
+
+/* Intel PMU */
+#define MSR_TYPE_COUNTER 0
+#define MSR_TYPE_CTRL 1
+#define MSR_TYPE_GLOBAL 2
+#define MSR_TYPE_ARCH_COUNTER 3
+#define MSR_TYPE_ARCH_CTRL 4
+
+/* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */
+#define PMU_GENERAL_NR_SHIFT 8
+#define PMU_GENERAL_NR_BITS 8
+#define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) \
+ << PMU_GENERAL_NR_SHIFT)
+
+/* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */
+#define PMU_FIXED_NR_SHIFT 0
+#define PMU_FIXED_NR_BITS 5
+#define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) - 1) \
+ << PMU_FIXED_NR_SHIFT)
+
+/* Alias registers (0x4c1) for full-width writes to PMCs */
+#define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
+
+static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;
+
+
+static void xen_pmu_arch_init(void)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+
+ switch (boot_cpu_data.x86) {
+ case 0x15:
+ amd_num_counters = F15H_NUM_COUNTERS;
+ amd_counters_base = MSR_F15H_PERF_CTR;
+ amd_ctrls_base = MSR_F15H_PERF_CTL;
+ amd_msr_step = 2;
+ k7_counters_mirrored = 1;
+ break;
+ case 0x10:
+ case 0x12:
+ case 0x14:
+ case 0x16:
+ default:
+ amd_num_counters = F10H_NUM_COUNTERS;
+ amd_counters_base = MSR_K7_PERFCTR0;
+ amd_ctrls_base = MSR_K7_EVNTSEL0;
+ amd_msr_step = 1;
+ k7_counters_mirrored = 0;
+ break;
+ }
+ } else {
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(0xa, &eax, &ebx, &ecx, &edx);
+
+ intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >>
+ PMU_GENERAL_NR_SHIFT;
+ intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >>
+ PMU_FIXED_NR_SHIFT;
+ }
+}
+
+static inline uint32_t get_fam15h_addr(u32 addr)
+{
+ switch (addr) {
+ case MSR_K7_PERFCTR0:
+ case MSR_K7_PERFCTR1:
+ case MSR_K7_PERFCTR2:
+ case MSR_K7_PERFCTR3:
+ return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0);
+ case MSR_K7_EVNTSEL0:
+ case MSR_K7_EVNTSEL1:
+ case MSR_K7_EVNTSEL2:
+ case MSR_K7_EVNTSEL3:
+ return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0);
+ default:
+ break;
+ }
+
+ return addr;
+}
+
+static inline bool is_amd_pmu_msr(unsigned int msr)
+{
+ if ((msr >= MSR_F15H_PERF_CTL &&
+ msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) ||
+ (msr >= MSR_K7_EVNTSEL0 &&
+ msr < MSR_K7_PERFCTR0 + amd_num_counters))
+ return true;
+
+ return false;
+}
+
+static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
+{
+ u32 msr_index_pmc;
+
+ switch (msr_index) {
+ case MSR_CORE_PERF_FIXED_CTR_CTRL:
+ case MSR_IA32_DS_AREA:
+ case MSR_IA32_PEBS_ENABLE:
+ *type = MSR_TYPE_CTRL;
+ return true;
+
+ case MSR_CORE_PERF_GLOBAL_CTRL:
+ case MSR_CORE_PERF_GLOBAL_STATUS:
+ case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ *type = MSR_TYPE_GLOBAL;
+ return true;
+
+ default:
+
+ if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
+ (msr_index < MSR_CORE_PERF_FIXED_CTR0 +
+ intel_num_fixed_counters)) {
+ *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
+ *type = MSR_TYPE_COUNTER;
+ return true;
+ }
+
+ if ((msr_index >= MSR_P6_EVNTSEL0) &&
+ (msr_index < MSR_P6_EVNTSEL0 + intel_num_arch_counters)) {
+ *index = msr_index - MSR_P6_EVNTSEL0;
+ *type = MSR_TYPE_ARCH_CTRL;
+ return true;
+ }
+
+ msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
+ if ((msr_index_pmc >= MSR_IA32_PERFCTR0) &&
+ (msr_index_pmc < MSR_IA32_PERFCTR0 +
+ intel_num_arch_counters)) {
+ *type = MSR_TYPE_ARCH_COUNTER;
+ *index = msr_index_pmc - MSR_IA32_PERFCTR0;
+ return true;
+ }
+ return false;
+ }
+}
+
/* perf callbacks*/
int xen_is_in_guest(void)
{
@@ -141,8 +290,10 @@ void xen_pmu_init(int cpu)

per_cpu(xenpmu_shared, cpu) = xenpmu_data;

- if (cpu == 0)
+ if (cpu == 0) {
perf_register_guest_info_callbacks(&xen_guest_cbs);
+ xen_pmu_arch_init();
+ }

return;

--
1.8.1.4

2015-08-10 01:33:05

by Boris Ostrovsky

[permalink] [raw]
Subject: [PATCH v6 5/6] xen/PMU: Intercept PMU-related MSR and APIC accesses

Provide interfaces for recognizing accesses to PMU-related MSRs and
LVTPC APIC and process these accesses in Xen PMU code.

(The interrupt handler performs XENPMU_flush right away in the beginning
since no PMU emulation is available. It will be added with a later patch).

Signed-off-by: Boris Ostrovsky <[email protected]>
Reviewed-by: David Vrabel <[email protected]>
---
arch/x86/xen/apic.c | 5 ++-
arch/x86/xen/enlighten.c | 11 +++--
arch/x86/xen/pmu.c | 95 +++++++++++++++++++++++++++++++++++++++++-
arch/x86/xen/pmu.h | 4 ++
include/xen/interface/xenpmu.h | 2 +
5 files changed, 109 insertions(+), 8 deletions(-)

diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index d03ebfa..acda713 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -7,6 +7,7 @@
#include <xen/xen.h>
#include <xen/interface/physdev.h>
#include "xen-ops.h"
+#include "pmu.h"
#include "smp.h"

static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
@@ -72,8 +73,10 @@ static u32 xen_apic_read(u32 reg)

static void xen_apic_write(u32 reg, u32 val)
{
- if (reg == APIC_LVTPC)
+ if (reg == APIC_LVTPC) {
+ (void)pmu_apic_update(reg);
return;
+ }

/* Warn to see if there's any stray references */
WARN(1,"register: %x, value: %x\n", reg, val);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index cf46416..623f8ad 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -991,6 +991,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
{
u64 val;

+ if (pmu_msr_read(msr, &val, err))
+ return val;
+
val = native_read_msr_safe(msr, err);
switch (msr) {
case MSR_IA32_APICBASE:
@@ -1037,17 +1040,13 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
Xen console noise. */

default:
- ret = native_write_msr_safe(msr, low, high);
+ if (!pmu_msr_write(msr, low, high, &ret))
+ ret = native_write_msr_safe(msr, low, high);
}

return ret;
}

-unsigned long long xen_read_pmc(int counter)
-{
- return 0;
-}
-
void xen_setup_shared_info(void)
{
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 1fc7e10..69a0b68 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -51,6 +51,8 @@ static __read_mostly int amd_num_counters;
/* Alias registers (0x4c1) for full-width writes to PMCs */
#define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))

+#define INTEL_PMC_TYPE_SHIFT 30
+
static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;


@@ -167,6 +169,91 @@ static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
}
}

+bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
+{
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ if (is_amd_pmu_msr(msr)) {
+ *val = native_read_msr_safe(msr, err);
+ return true;
+ }
+ } else {
+ int type, index;
+
+ if (is_intel_pmu_msr(msr, &type, &index)) {
+ *val = native_read_msr_safe(msr, err);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ if (is_amd_pmu_msr(msr)) {
+ *err = native_write_msr_safe(msr, low, high);
+ return true;
+ }
+ } else {
+ int type, index;
+
+ if (is_intel_pmu_msr(msr, &type, &index)) {
+ *err = native_write_msr_safe(msr, low, high);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned long long xen_amd_read_pmc(int counter)
+{
+ uint32_t msr;
+ int err;
+
+ msr = amd_counters_base + (counter * amd_msr_step);
+ return native_read_msr_safe(msr, &err);
+}
+
+unsigned long long xen_intel_read_pmc(int counter)
+{
+ int err;
+ uint32_t msr;
+
+ if (counter & (1<<INTEL_PMC_TYPE_SHIFT))
+ msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
+ else
+ msr = MSR_IA32_PERFCTR0 + counter;
+
+ return native_read_msr_safe(msr, &err);
+}
+
+unsigned long long xen_read_pmc(int counter)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return xen_amd_read_pmc(counter);
+ else
+ return xen_intel_read_pmc(counter);
+}
+
+int pmu_apic_update(uint32_t val)
+{
+ int ret;
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
+ return -EINVAL;
+ }
+
+ xenpmu_data->pmu.l.lapic_lvtpc = val;
+ ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
+
+ return ret;
+}
+
/* perf callbacks*/
int xen_is_in_guest(void)
{
@@ -239,7 +326,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,

irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
{
- int ret = IRQ_NONE;
+ int err, ret = IRQ_NONE;
struct pt_regs regs;
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();

@@ -248,6 +335,12 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
return ret;
}

+ err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
+ if (err) {
+ WARN_ONCE(1, "%s failed hypercall, err: %d\n", __func__, err);
+ return ret;
+ }
+
xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
xenpmu_data->pmu.pmu_flags);
if (x86_pmu.handle_irq(&regs))
diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h
index a76d2cf..af5f0ad 100644
--- a/arch/x86/xen/pmu.h
+++ b/arch/x86/xen/pmu.h
@@ -7,5 +7,9 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
void xen_pmu_init(int cpu);
void xen_pmu_finish(int cpu);
bool is_xen_pmu(int cpu);
+bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err);
+bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err);
+int pmu_apic_update(uint32_t reg);
+unsigned long long xen_read_pmc(int counter);

#endif /* __XEN_PMU_H */
diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
index ca42301..139efc9 100644
--- a/include/xen/interface/xenpmu.h
+++ b/include/xen/interface/xenpmu.h
@@ -20,6 +20,8 @@
#define XENPMU_feature_set 3
#define XENPMU_init 4
#define XENPMU_finish 5
+#define XENPMU_lvtpc_set 6
+#define XENPMU_flush 7

/* ` } */

--
1.8.1.4

2015-08-10 01:32:36

by Boris Ostrovsky

[permalink] [raw]
Subject: [PATCH v6 6/6] xen/PMU: PMU emulation code

Add PMU emulation code that runs when we are processing a PMU interrupt.
This code will allow us not to trap to hypervisor on each MSR/LVTPC access
(of which there may be quite a few in the handler).

Signed-off-by: Boris Ostrovsky <[email protected]>
Reviewed-by: David Vrabel <[email protected]>
---
arch/x86/xen/pmu.c | 214 +++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 185 insertions(+), 29 deletions(-)

diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 69a0b68..4c09167 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -13,11 +13,20 @@
/* x86_pmu.handle_irq definition */
#include "../kernel/cpu/perf_event.h"

+#define XENPMU_IRQ_PROCESSING 1
+struct xenpmu {
+ /* Shared page between hypervisor and domain */
+ struct xen_pmu_data *xenpmu_data;

-/* Shared page between hypervisor and domain */
-DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
-#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id())
+ uint8_t flags;
+};
+DEFINE_PER_CPU(struct xenpmu, xenpmu_shared);
+#define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
+#define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags)

+/* Macro for computing address of a PMU MSR bank */
+#define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
+ (uintptr_t)ctxt->field))

/* AMD PMU */
#define F15H_NUM_COUNTERS 6
@@ -169,19 +178,124 @@ static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
}
}

-bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
+static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
+ int index, bool is_read)
{
+ uint64_t *reg = NULL;
+ struct xen_pmu_intel_ctxt *ctxt;
+ uint64_t *fix_counters;
+ struct xen_pmu_cntr_pair *arch_cntr_pair;
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+ uint8_t xenpmu_flags = get_xenpmu_flags();
+

+ if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
+ return false;
+
+ ctxt = &xenpmu_data->pmu.c.intel;
+
+ switch (msr) {
+ case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ reg = &ctxt->global_ovf_ctrl;
+ break;
+ case MSR_CORE_PERF_GLOBAL_STATUS:
+ reg = &ctxt->global_status;
+ break;
+ case MSR_CORE_PERF_GLOBAL_CTRL:
+ reg = &ctxt->global_ctrl;
+ break;
+ case MSR_CORE_PERF_FIXED_CTR_CTRL:
+ reg = &ctxt->fixed_ctrl;
+ break;
+ default:
+ switch (type) {
+ case MSR_TYPE_COUNTER:
+ fix_counters = field_offset(ctxt, fixed_counters);
+ reg = &fix_counters[index];
+ break;
+ case MSR_TYPE_ARCH_COUNTER:
+ arch_cntr_pair = field_offset(ctxt, arch_counters);
+ reg = &arch_cntr_pair[index].counter;
+ break;
+ case MSR_TYPE_ARCH_CTRL:
+ arch_cntr_pair = field_offset(ctxt, arch_counters);
+ reg = &arch_cntr_pair[index].control;
+ break;
+ default:
+ return false;
+ }
+ }
+
+ if (reg) {
+ if (is_read)
+ *val = *reg;
+ else {
+ *reg = *val;
+
+ if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL)
+ ctxt->global_status &= (~(*val));
+ }
+ return true;
+ }
+
+ return false;
+}
+
+static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
+{
+ uint64_t *reg = NULL;
+ int i, off = 0;
+ struct xen_pmu_amd_ctxt *ctxt;
+ uint64_t *counter_regs, *ctrl_regs;
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+ uint8_t xenpmu_flags = get_xenpmu_flags();
+
+ if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
+ return false;
+
+ if (k7_counters_mirrored &&
+ ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)))
+ msr = get_fam15h_addr(msr);
+
+ ctxt = &xenpmu_data->pmu.c.amd;
+ for (i = 0; i < amd_num_counters; i++) {
+ if (msr == amd_ctrls_base + off) {
+ ctrl_regs = field_offset(ctxt, ctrls);
+ reg = &ctrl_regs[i];
+ break;
+ } else if (msr == amd_counters_base + off) {
+ counter_regs = field_offset(ctxt, counters);
+ reg = &counter_regs[i];
+ break;
+ }
+ off += amd_msr_step;
+ }
+
+ if (reg) {
+ if (is_read)
+ *val = *reg;
+ else
+ *reg = *val;
+
+ return true;
+ }
+ return false;
+}
+
+bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
+{
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
if (is_amd_pmu_msr(msr)) {
- *val = native_read_msr_safe(msr, err);
+ if (!xen_amd_pmu_emulate(msr, val, 1))
+ *val = native_read_msr_safe(msr, err);
return true;
}
} else {
int type, index;

if (is_intel_pmu_msr(msr, &type, &index)) {
- *val = native_read_msr_safe(msr, err);
+ if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
+ *val = native_read_msr_safe(msr, err);
return true;
}
}
@@ -191,16 +305,20 @@ bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)

bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
{
+ uint64_t val = ((uint64_t)high << 32) | low;
+
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
if (is_amd_pmu_msr(msr)) {
- *err = native_write_msr_safe(msr, low, high);
+ if (!xen_amd_pmu_emulate(msr, &val, 0))
+ *err = native_write_msr_safe(msr, low, high);
return true;
}
} else {
int type, index;

if (is_intel_pmu_msr(msr, &type, &index)) {
- *err = native_write_msr_safe(msr, low, high);
+ if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
+ *err = native_write_msr_safe(msr, low, high);
return true;
}
}
@@ -210,24 +328,52 @@ bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)

unsigned long long xen_amd_read_pmc(int counter)
{
- uint32_t msr;
- int err;
+ struct xen_pmu_amd_ctxt *ctxt;
+ uint64_t *counter_regs;
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+ uint8_t xenpmu_flags = get_xenpmu_flags();
+
+ if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
+ uint32_t msr;
+ int err;

- msr = amd_counters_base + (counter * amd_msr_step);
- return native_read_msr_safe(msr, &err);
+ msr = amd_counters_base + (counter * amd_msr_step);
+ return native_read_msr_safe(msr, &err);
+ }
+
+ ctxt = &xenpmu_data->pmu.c.amd;
+ counter_regs = field_offset(ctxt, counters);
+ return counter_regs[counter];
}

unsigned long long xen_intel_read_pmc(int counter)
{
- int err;
- uint32_t msr;
+ struct xen_pmu_intel_ctxt *ctxt;
+ uint64_t *fixed_counters;
+ struct xen_pmu_cntr_pair *arch_cntr_pair;
+ struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+ uint8_t xenpmu_flags = get_xenpmu_flags();

- if (counter & (1<<INTEL_PMC_TYPE_SHIFT))
- msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
- else
- msr = MSR_IA32_PERFCTR0 + counter;
+ if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
+ uint32_t msr;
+ int err;

- return native_read_msr_safe(msr, &err);
+ if (counter & (1 << INTEL_PMC_TYPE_SHIFT))
+ msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
+ else
+ msr = MSR_IA32_PERFCTR0 + counter;
+
+ return native_read_msr_safe(msr, &err);
+ }
+
+ ctxt = &xenpmu_data->pmu.c.intel;
+ if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) {
+ fixed_counters = field_offset(ctxt, fixed_counters);
+ return fixed_counters[counter & 0xffff];
+ }
+
+ arch_cntr_pair = field_offset(ctxt, arch_counters);
+ return arch_cntr_pair[counter].counter;
}

unsigned long long xen_read_pmc(int counter)
@@ -249,6 +395,10 @@ int pmu_apic_update(uint32_t val)
}

xenpmu_data->pmu.l.lapic_lvtpc = val;
+
+ if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING)
+ return 0;
+
ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);

return ret;
@@ -329,29 +479,34 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
int err, ret = IRQ_NONE;
struct pt_regs regs;
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+ uint8_t xenpmu_flags = get_xenpmu_flags();

if (!xenpmu_data) {
WARN_ONCE(1, "%s: pmudata not initialized\n", __func__);
return ret;
}

- err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
- if (err) {
- WARN_ONCE(1, "%s failed hypercall, err: %d\n", __func__, err);
- return ret;
- }
-
+ this_cpu_ptr(&xenpmu_shared)->flags =
+ xenpmu_flags | XENPMU_IRQ_PROCESSING;
xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
xenpmu_data->pmu.pmu_flags);
if (x86_pmu.handle_irq(&regs))
ret = IRQ_HANDLED;

+ /* Write out cached context to HW */
+ err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
+ this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags;
+ if (err) {
+ WARN_ONCE(1, "%s failed hypercall, err: %d\n", __func__, err);
+ return IRQ_NONE;
+ }
+
return ret;
}

bool is_xen_pmu(int cpu)
{
- return (per_cpu(xenpmu_shared, cpu) != NULL);
+ return (get_xenpmu_data() != NULL);
}

void xen_pmu_init(int cpu)
@@ -381,7 +536,8 @@ void xen_pmu_init(int cpu)
if (err)
goto fail;

- per_cpu(xenpmu_shared, cpu) = xenpmu_data;
+ per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
+ per_cpu(xenpmu_shared, cpu).flags = 0;

if (cpu == 0) {
perf_register_guest_info_callbacks(&xen_guest_cbs);
@@ -411,6 +567,6 @@ void xen_pmu_finish(int cpu)

(void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);

- free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0);
- per_cpu(xenpmu_shared, cpu) = NULL;
+ free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0);
+ per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL;
}
--
1.8.1.4

2015-08-10 10:03:19

by Stefano Stabellini

[permalink] [raw]
Subject: Re: [PATCH v6 0/6] xen/PMU: PMU support for Xen PV(H) guests

On Sun, 9 Aug 2015, Boris Ostrovsky wrote:
> Changes in v6:
> * Fix ARM builds (as suggested by Julien):
> o Make XEN_SYMS depend on X86 (patch 1)
> o Add CONFIG_XEN_HAVE_PVMMU and use it in drivers/xen/sys-hypervisor.c
> (patch 2)
> * Adjust release dates in Documentation/ABI/testing/sysfs-hypervisor-pmu
> (patch 2)

I confirm that it compiles just fine on ARM now.


> Boris Ostrovsky (6):
> xen: xensyms support
> xen/PMU: Sysfs interface for setting Xen PMU mode
> xen/PMU: Initialization code for Xen PMU
> xen/PMU: Describe vendor-specific PMU registers
> xen/PMU: Intercept PMU-related MSR and APIC accesses
> xen/PMU: PMU emulation code
>
> Documentation/ABI/testing/sysfs-hypervisor-pmu | 23 +
> arch/x86/include/asm/xen/hypercall.h | 6 +
> arch/x86/include/asm/xen/interface.h | 123 ++++++
> arch/x86/xen/Kconfig | 1 +
> arch/x86/xen/Makefile | 2 +-
> arch/x86/xen/apic.c | 6 +
> arch/x86/xen/enlighten.c | 13 +-
> arch/x86/xen/pmu.c | 572 +++++++++++++++++++++++++
> arch/x86/xen/pmu.h | 15 +
> arch/x86/xen/smp.c | 29 +-
> arch/x86/xen/suspend.c | 23 +-
> drivers/xen/Kconfig | 11 +
> drivers/xen/sys-hypervisor.c | 136 +++++-
> drivers/xen/xenfs/Makefile | 1 +
> drivers/xen/xenfs/super.c | 3 +
> drivers/xen/xenfs/xenfs.h | 1 +
> drivers/xen/xenfs/xensyms.c | 152 +++++++
> include/xen/interface/platform.h | 18 +
> include/xen/interface/xen.h | 2 +
> include/xen/interface/xenpmu.h | 94 ++++
> 20 files changed, 1220 insertions(+), 11 deletions(-)
> create mode 100644 Documentation/ABI/testing/sysfs-hypervisor-pmu
> create mode 100644 arch/x86/xen/pmu.c
> create mode 100644 arch/x86/xen/pmu.h
> create mode 100644 drivers/xen/xenfs/xensyms.c
> create mode 100644 include/xen/interface/xenpmu.h
>
> --
> 1.8.1.4
>

2015-08-10 14:11:11

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH v6 2/6] xen/PMU: Sysfs interface for setting Xen PMU mode

On Sun, Aug 09, 2015 at 09:31:43PM -0400, Boris Ostrovsky wrote:
> Set Xen's PMU mode via /sys/hypervisor/pmu/pmu_mode. Add XENPMU hypercall.
>
> Signed-off-by: Boris Ostrovsky <[email protected]>

Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> Documentation/ABI/testing/sysfs-hypervisor-pmu | 23 +++++
> arch/x86/include/asm/xen/hypercall.h | 6 ++
> arch/x86/xen/Kconfig | 1 +
> drivers/xen/Kconfig | 3 +
> drivers/xen/sys-hypervisor.c | 136 ++++++++++++++++++++++++-
> include/xen/interface/xen.h | 1 +
> include/xen/interface/xenpmu.h | 59 +++++++++++
> 7 files changed, 228 insertions(+), 1 deletion(-)
> create mode 100644 Documentation/ABI/testing/sysfs-hypervisor-pmu
> create mode 100644 include/xen/interface/xenpmu.h
>
> diff --git a/Documentation/ABI/testing/sysfs-hypervisor-pmu b/Documentation/ABI/testing/sysfs-hypervisor-pmu
> new file mode 100644
> index 0000000..224faa1
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-hypervisor-pmu
> @@ -0,0 +1,23 @@
> +What: /sys/hypervisor/pmu/pmu_mode
> +Date: August 2015
> +KernelVersion: 4.3
> +Contact: Boris Ostrovsky <[email protected]>
> +Description:
> + Describes mode that Xen's performance-monitoring unit (PMU)
> + uses. Accepted values are
> + "off" -- PMU is disabled
> + "self" -- The guest can profile itself
> + "hv" -- The guest can profile itself and, if it is
> + privileged (e.g. dom0), the hypervisor
> + "all" -- The guest can profile itself, the hypervisor
> + and all other guests. Only available to
> + privileged guests.
> +
> +What: /sys/hypervisor/pmu/pmu_features
> +Date: August 2015
> +KernelVersion: 4.3
> +Contact: Boris Ostrovsky <[email protected]>
> +Description:
> + Describes Xen PMU features (as an integer). A set bit indicates
> + that the corresponding feature is enabled. See
> + include/xen/interface/xenpmu.h for available features
> diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
> index ca08a27..83aea80 100644
> --- a/arch/x86/include/asm/xen/hypercall.h
> +++ b/arch/x86/include/asm/xen/hypercall.h
> @@ -465,6 +465,12 @@ HYPERVISOR_tmem_op(
> return _hypercall1(int, tmem_op, op);
> }
>
> +static inline int
> +HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
> +{
> + return _hypercall2(int, xenpmu_op, op, arg);
> +}
> +
> static inline void
> MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
> {
> diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
> index e88fda8..049cdda 100644
> --- a/arch/x86/xen/Kconfig
> +++ b/arch/x86/xen/Kconfig
> @@ -7,6 +7,7 @@ config XEN
> depends on PARAVIRT
> select PARAVIRT_CLOCK
> select XEN_HAVE_PVMMU
> + select XEN_HAVE_VPMU
> depends on X86_64 || (X86_32 && X86_PAE)
> depends on X86_TSC
> help
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index 9367604..73708ac 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -288,4 +288,7 @@ config XEN_SYMS
> Exports hypervisor symbols (along with their types and addresses) via
> /proc/xen/xensyms file, similar to /proc/kallsyms
>
> +config XEN_HAVE_VPMU
> + bool
> +
> endmenu
> diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
> index 96453f8..0907275 100644
> --- a/drivers/xen/sys-hypervisor.c
> +++ b/drivers/xen/sys-hypervisor.c
> @@ -20,6 +20,9 @@
> #include <xen/xenbus.h>
> #include <xen/interface/xen.h>
> #include <xen/interface/version.h>
> +#ifdef CONFIG_XEN_HAVE_VPMU
> +#include <xen/interface/xenpmu.h>
> +#endif
>
> #define HYPERVISOR_ATTR_RO(_name) \
> static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
> @@ -368,6 +371,126 @@ static void xen_properties_destroy(void)
> sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
> }
>
> +#ifdef CONFIG_XEN_HAVE_VPMU
> +struct pmu_mode {
> + const char *name;
> + uint32_t mode;
> +};
> +
> +struct pmu_mode pmu_modes[] = {
> + {"off", XENPMU_MODE_OFF},
> + {"self", XENPMU_MODE_SELF},
> + {"hv", XENPMU_MODE_HV},
> + {"all", XENPMU_MODE_ALL}
> +};
> +
> +static ssize_t pmu_mode_store(struct hyp_sysfs_attr *attr,
> + const char *buffer, size_t len)
> +{
> + int ret;
> + struct xen_pmu_params xp;
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
> + if (strncmp(buffer, pmu_modes[i].name, len - 1) == 0) {
> + xp.val = pmu_modes[i].mode;
> + break;
> + }
> + }
> +
> + if (i == ARRAY_SIZE(pmu_modes))
> + return -EINVAL;
> +
> + xp.version.maj = XENPMU_VER_MAJ;
> + xp.version.min = XENPMU_VER_MIN;
> + ret = HYPERVISOR_xenpmu_op(XENPMU_mode_set, &xp);
> + if (ret)
> + return ret;
> +
> + return len;
> +}
> +
> +static ssize_t pmu_mode_show(struct hyp_sysfs_attr *attr, char *buffer)
> +{
> + int ret;
> + struct xen_pmu_params xp;
> + int i;
> + uint32_t mode;
> +
> + xp.version.maj = XENPMU_VER_MAJ;
> + xp.version.min = XENPMU_VER_MIN;
> + ret = HYPERVISOR_xenpmu_op(XENPMU_mode_get, &xp);
> + if (ret)
> + return ret;
> +
> + mode = (uint32_t)xp.val;
> + for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
> + if (mode == pmu_modes[i].mode)
> + return sprintf(buffer, "%s\n", pmu_modes[i].name);
> + }
> +
> + return -EINVAL;
> +}
> +HYPERVISOR_ATTR_RW(pmu_mode);
> +
> +static ssize_t pmu_features_store(struct hyp_sysfs_attr *attr,
> + const char *buffer, size_t len)
> +{
> + int ret;
> + uint32_t features;
> + struct xen_pmu_params xp;
> +
> + ret = kstrtou32(buffer, 0, &features);
> + if (ret)
> + return ret;
> +
> + xp.val = features;
> + xp.version.maj = XENPMU_VER_MAJ;
> + xp.version.min = XENPMU_VER_MIN;
> + ret = HYPERVISOR_xenpmu_op(XENPMU_feature_set, &xp);
> + if (ret)
> + return ret;
> +
> + return len;
> +}
> +
> +static ssize_t pmu_features_show(struct hyp_sysfs_attr *attr, char *buffer)
> +{
> + int ret;
> + struct xen_pmu_params xp;
> +
> + xp.version.maj = XENPMU_VER_MAJ;
> + xp.version.min = XENPMU_VER_MIN;
> + ret = HYPERVISOR_xenpmu_op(XENPMU_feature_get, &xp);
> + if (ret)
> + return ret;
> +
> + return sprintf(buffer, "0x%x\n", (uint32_t)xp.val);
> +}
> +HYPERVISOR_ATTR_RW(pmu_features);
> +
> +static struct attribute *xen_pmu_attrs[] = {
> + &pmu_mode_attr.attr,
> + &pmu_features_attr.attr,
> + NULL
> +};
> +
> +static const struct attribute_group xen_pmu_group = {
> + .name = "pmu",
> + .attrs = xen_pmu_attrs,
> +};
> +
> +static int __init xen_pmu_init(void)
> +{
> + return sysfs_create_group(hypervisor_kobj, &xen_pmu_group);
> +}
> +
> +static void xen_pmu_destroy(void)
> +{
> + sysfs_remove_group(hypervisor_kobj, &xen_pmu_group);
> +}
> +#endif
> +
> static int __init hyper_sysfs_init(void)
> {
> int ret;
> @@ -390,7 +513,15 @@ static int __init hyper_sysfs_init(void)
> ret = xen_properties_init();
> if (ret)
> goto prop_out;
> -
> +#ifdef CONFIG_XEN_HAVE_VPMU
> + if (xen_initial_domain()) {
> + ret = xen_pmu_init();
> + if (ret) {
> + xen_properties_destroy();
> + goto prop_out;
> + }
> + }
> +#endif
> goto out;
>
> prop_out:
> @@ -407,6 +538,9 @@ out:
>
> static void __exit hyper_sysfs_exit(void)
> {
> +#ifdef CONFIG_XEN_HAVE_VPMU
> + xen_pmu_destroy();
> +#endif
> xen_properties_destroy();
> xen_compilation_destroy();
> xen_sysfs_uuid_destroy();
> diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
> index a483789..c6399a1 100644
> --- a/include/xen/interface/xen.h
> +++ b/include/xen/interface/xen.h
> @@ -80,6 +80,7 @@
> #define __HYPERVISOR_kexec_op 37
> #define __HYPERVISOR_tmem_op 38
> #define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */
> +#define __HYPERVISOR_xenpmu_op 40
>
> /* Architecture-specific hypercall definitions. */
> #define __HYPERVISOR_arch_0 48
> diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h
> new file mode 100644
> index 0000000..eac1b49
> --- /dev/null
> +++ b/include/xen/interface/xenpmu.h
> @@ -0,0 +1,59 @@
> +#ifndef __XEN_PUBLIC_XENPMU_H__
> +#define __XEN_PUBLIC_XENPMU_H__
> +
> +#include "xen.h"
> +
> +#define XENPMU_VER_MAJ 0
> +#define XENPMU_VER_MIN 1
> +
> +/*
> + * ` enum neg_errnoval
> + * ` HYPERVISOR_xenpmu_op(enum xenpmu_op cmd, struct xenpmu_params *args);
> + *
> + * @cmd == XENPMU_* (PMU operation)
> + * @args == struct xenpmu_params
> + */
> +/* ` enum xenpmu_op { */
> +#define XENPMU_mode_get 0 /* Also used for getting PMU version */
> +#define XENPMU_mode_set 1
> +#define XENPMU_feature_get 2
> +#define XENPMU_feature_set 3
> +#define XENPMU_init 4
> +#define XENPMU_finish 5
> +
> +/* ` } */
> +
> +/* Parameters structure for HYPERVISOR_xenpmu_op call */
> +struct xen_pmu_params {
> + /* IN/OUT parameters */
> + struct {
> + uint32_t maj;
> + uint32_t min;
> + } version;
> + uint64_t val;
> +
> + /* IN parameters */
> + uint32_t vcpu;
> + uint32_t pad;
> +};
> +
> +/* PMU modes:
> + * - XENPMU_MODE_OFF: No PMU virtualization
> + * - XENPMU_MODE_SELF: Guests can profile themselves
> + * - XENPMU_MODE_HV: Guests can profile themselves, dom0 profiles
> + * itself and Xen
> + * - XENPMU_MODE_ALL: Only dom0 has access to VPMU and it profiles
> + * everyone: itself, the hypervisor and the guests.
> + */
> +#define XENPMU_MODE_OFF 0
> +#define XENPMU_MODE_SELF (1<<0)
> +#define XENPMU_MODE_HV (1<<1)
> +#define XENPMU_MODE_ALL (1<<2)
> +
> +/*
> + * PMU features:
> + * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD)
> + */
> +#define XENPMU_FEATURE_INTEL_BTS 1
> +
> +#endif /* __XEN_PUBLIC_XENPMU_H__ */
> --
> 1.8.1.4
>