This patch contains the new x86_64 files.
The files are as follows:
arch/x86_64/perfmon/Kconfig:
- add menuconfig options
arch/x86_64/perfmon/Makefile:
- makefile for arch specific files
arch/x86_64/perfmon/perfmon_amd64.c:
- PMU description for AMD64 PMU for both 32 and 64-bit modes
arch/x86_64/perfmon/perfmon_em64t_pebs_smpl.c:
- sampling format for 64-bit PEBS support for EM64T processors
include/asm-x86_64/perfmon.h:
- architecture specific header definitions
include/asm-x86_64/perfmon_em64t_pebs_smpl.h:
- public header file for 64-bit PEBS sampling format for EM64T processors
--- linux-2.6.17.9.base/arch/x86_64/perfmon/Kconfig 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.17.9/arch/x86_64/perfmon/Kconfig 2006-08-21 03:37:46.000000000 -0700
@@ -0,0 +1,39 @@
+menu "Hardware Performance Monitoring support"
+config PERFMON
+ bool "Perfmon2 performance monitoring interface"
+ select X86_LOCAL_APIC
+ default y
+ help
+ Enables the perfmon2 interface to access the hardware
+ performance counters. See <http://perfmon2.sf.net/> for
+ more details. If you're unsure, say Y.
+
+config X86_64_PERFMON_AMD64
+ tristate "Support 64-bit mode AMD64 hardware performance counters"
+ depends on PERFMON
+ default m
+ help
+ Enables support for 64-bit mode AMD64 hardware performance
+ counters. Does not work with Intel EM64T processors.
+ If unsure, say m.
+
+config X86_64_PERFMON_EM64T
+ tristate "Support Intel EM64T hardware performance counters"
+ depends on PERFMON
+ default m
+ help
+ Enables support for the Intel EM64T hardware performance
+ counters. Does not work with AMD64 processors.
+ If unsure, say m.
+
+config X86_64_PERFMON_EM64T_PEBS
+ tristate "Support for Intel EM64T PEBS sampling format"
+ depends on X86_64_PERFMON_EM64T
+ default m
+ help
+ Enables support for Precise Event-Based Sampling (PEBS) on the Intel
+ EM64T processors which support it. Does not work with AMD X86-64
+ processors.
+ If unsure, say m.
+
+endmenu
--- linux-2.6.17.9.base/arch/x86_64/perfmon/Makefile 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.17.9/arch/x86_64/perfmon/Makefile 2006-08-21 03:37:46.000000000 -0700
@@ -0,0 +1,12 @@
+#
+# Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
+# Contributed by Stephane Eranian <[email protected]>
+#
+
+obj-$(CONFIG_PERFMON) += perfmon.o
+obj-$(CONFIG_X86_64_PERFMON_AMD64) += perfmon_amd64.o
+obj-$(CONFIG_X86_64_PERFMON_EM64T) += perfmon_p4.o
+obj-$(CONFIG_X86_64_PERFMON_EM64T_PEBS) += perfmon_em64t_pebs_smpl.o
+
+perfmon-$(subst m,y,$(CONFIG_PERFMON)) += ../../i386/perfmon/perfmon.o
+perfmon_p4-$(subst m,y,$(CONFIG_X86_64_PERFMON_EM64T)) += ../../i386/perfmon/perfmon_p4.o
--- linux-2.6.17.9.base/arch/x86_64/perfmon/perfmon_amd64.c 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.17.9/arch/x86_64/perfmon/perfmon_amd64.c 2006-08-21 03:37:46.000000000 -0700
@@ -0,0 +1,124 @@
+/*
+ * This file contains the AMD64 PMU register
+ * description tables. It supports 32 and 64-bit modes.
+ *
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
+ * Contributed by Stephane Eranian <[email protected]>
+ */
+#include <linux/module.h>
+#include <linux/perfmon.h>
+
+MODULE_AUTHOR("Stephane Eranian <[email protected]>");
+MODULE_DESCRIPTION("AMD64 PMU description table");
+MODULE_LICENSE("GPL");
+
+static struct pfm_arch_pmu_info pfm_amd64_pmu_info={
+ .pmc_addrs = {
+ {{MSR_K7_EVNTSEL0, 0}, 0, PFM_REGT_SELEN},
+ {{MSR_K7_EVNTSEL1, 0}, 1, PFM_REGT_SELEN},
+ {{MSR_K7_EVNTSEL2, 0}, 2, PFM_REGT_SELEN},
+ {{MSR_K7_EVNTSEL3, 0}, 3, PFM_REGT_SELEN},
+ },
+ .pmd_addrs = {
+ {{MSR_K7_PERFCTR0, 0}, 0, PFM_REGT_CTR},
+ {{MSR_K7_PERFCTR1, 0}, 0, PFM_REGT_CTR},
+ {{MSR_K7_PERFCTR2, 0}, 0, PFM_REGT_CTR},
+ {{MSR_K7_PERFCTR3, 0}, 0, PFM_REGT_CTR},
+ },
+ .pmu_style = PFM_X86_PMU_P6,
+};
+
+/*
+ * reserved bits must be zero
+ *
+ * - upper 32 bits are reserved
+ * - APIC enable bit is reserved (forced to 1)
+ * - bit 21 is reserved
+ */
+#define PFM_AMD64_RSVD ~((0xffffffffULL<<32) \
+ | (PFM_ONE_64<<20) \
+ | (PFM_ONE_64<<21))
+
+/*
+ * force Local APIC interrupt on overflow
+ */
+#define PFM_AMD64_VAL (PFM_ONE_64<<20)
+#define PFM_AMD64_NO64 (PFM_ONE_64<<20)
+
+static struct pfm_reg_desc pfm_amd64_pmc_desc[]={
+/* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_AMD64_VAL, PFM_AMD64_RSVD, PFM_AMD64_NO64),
+/* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_AMD64_VAL, PFM_AMD64_RSVD, PFM_AMD64_NO64),
+/* pmc2 */ PMC_D(PFM_REG_I64, "PERFSEL2", PFM_AMD64_VAL, PFM_AMD64_RSVD, PFM_AMD64_NO64),
+/* pmc3 */ PMC_D(PFM_REG_I64, "PERFSEL3", PFM_AMD64_VAL, PFM_AMD64_RSVD, PFM_AMD64_NO64),
+};
+#define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_amd64_pmc_desc)
+
+static struct pfm_reg_desc pfm_amd64_pmd_desc[]={
+/* pmd0 */ PMD_D(PFM_REG_C, "PERFCTR0"),
+/* pmd1 */ PMD_D(PFM_REG_C, "PERFCTR1"),
+/* pmd2 */ PMD_D(PFM_REG_C, "PERFCTR2"),
+/* pmd3 */ PMD_D(PFM_REG_C, "PERFCTR3")
+};
+#define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_amd64_pmd_desc)
+
+static int pfm_amd64_probe_pmu(void)
+{
+ PFM_INFO("family=%d x86_model=%d",
+ cpu_data->x86, cpu_data->x86_model);
+
+ if (cpu_data->x86 != 15) {
+ PFM_INFO("unsupported family=%d", cpu_data->x86);
+ return -1;
+ }
+
+ if (cpu_data->x86_vendor != X86_VENDOR_AMD) {
+ PFM_INFO("not an AMD processor");
+ return -1;
+ }
+
+ /*
+ * check for local APIC (required)
+ */
+ if (!cpu_has_apic) {
+ PFM_INFO("no local APIC, unsupported");
+ return -1;
+ }
+ return 0;
+}
+
+static struct pfm_pmu_config pfm_amd64_pmu_conf={
+ .pmu_name = "AMD64",
+ .counter_width = 47,
+ .pmd_desc = pfm_amd64_pmd_desc,
+ .pmc_desc = pfm_amd64_pmc_desc,
+ .num_pmc_entries = PFM_AMD_NUM_PMCS,
+ .num_pmd_entries = PFM_AMD_NUM_PMDS,
+ .probe_pmu = pfm_amd64_probe_pmu,
+ .version = "1.0",
+ .arch_info = &pfm_amd64_pmu_info,
+ .flags = PFM_PMU_BUILTIN_FLAG,
+ .owner = THIS_MODULE
+};
+
+static int __init pfm_amd64_pmu_init_module(void)
+{
+ unsigned int i;
+
+ /*
+ * XXX: could be hardcoded for this PMU model
+ */
+ bitmap_zero(ulp(pfm_amd64_pmu_info.enable_mask), PFM_MAX_HW_PMCS);
+ for(i=0; i < PFM_MAX_HW_PMCS; i++) {
+ if (pfm_amd64_pmu_info.pmc_addrs[i].reg_type & PFM_REGT_SELEN)
+ pfm_bv_set(pfm_amd64_pmu_info.enable_mask, i);
+ }
+ return pfm_register_pmu_config(&pfm_amd64_pmu_conf);
+}
+
+static void __exit pfm_amd64_pmu_cleanup_module(void)
+{
+ pfm_unregister_pmu_config(&pfm_amd64_pmu_conf);
+}
+
+module_init(pfm_amd64_pmu_init_module);
+module_exit(pfm_amd64_pmu_cleanup_module);
--- linux-2.6.17.9.base/arch/x86_64/perfmon/perfmon_em64t_pebs_smpl.c 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.17.9/arch/x86_64/perfmon/perfmon_em64t_pebs_smpl.c 2006-08-21 03:37:46.000000000 -0700
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
+ * Contributed by Stephane Eranian <[email protected]>
+ *
+ * This file implements the PEBS sampling format for Intel
+ * EM64T Intel Pentium 4/Xeon processors. It does not work
+ * with Intel 32-bit P4/Xeon processors.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/sysctl.h>
+#include <asm/msr.h>
+
+#include <linux/perfmon.h>
+#include <asm/perfmon_em64t_pebs_smpl.h>
+
+#ifndef __x86_64__
+#error "this module is for the Intel EM64T processors"
+#endif
+
+MODULE_AUTHOR("Stephane Eranian <[email protected]>");
+MODULE_DESCRIPTION("Intel 64-bit P4/Xeon PEBS sampling");
+MODULE_LICENSE("GPL");
+
+static int pfm_pebs_fmt_validate(u32 flags, u16 npmds, void *data)
+{
+ struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info;
+ struct pfm_em64t_pebs_smpl_arg *arg = data;
+ size_t min_buf_size;
+
+ /*
+ * host CPU does not have PEBS support
+ */
+ if ((arch_info->flags & PFM_X86_PMU_PEBS) == 0) {
+ PFM_DBG("host PMU does not support PEBS sampling");
+ return -EINVAL;
+ }
+
+ /*
+ * need to define at least the size of the buffer
+ */
+ if (data == NULL) {
+ PFM_DBG("no argument passed");
+ return -EINVAL;
+ }
+
+ /*
+ * compute min buf size. npmds is the maximum number
+ * of implemented PMD registers.
+ */
+ min_buf_size = sizeof(struct pfm_em64t_pebs_smpl_hdr) + sizeof(struct pfm_em64t_pebs_smpl_entry);
+
+ PFM_DBG("validate flags=0x%x min_buf_size=%zu buf_size=%zu",
+ flags,
+ min_buf_size,
+ arg->buf_size);
+
+ /*
+ * must hold at least the buffer header + one minimally sized entry
+ */
+ if (arg->buf_size < min_buf_size)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int pfm_pebs_fmt_get_size(unsigned int flags, void *data, size_t *size)
+{
+ struct pfm_em64t_pebs_smpl_arg *arg = data;
+
+ /*
+ * size has been validated in pebs_fmt_validate()
+ */
+ *size = arg->buf_size + 256;
+
+ return 0;
+}
+
+static int pfm_pebs_fmt_init(struct pfm_context *ctx, void *buf,
+ u32 flags, u16 npmds, void *data)
+{
+ struct pfm_arch_context *ctx_arch;
+ struct pfm_em64t_pebs_smpl_hdr *hdr;
+ struct pfm_em64t_pebs_smpl_arg *arg = data;
+ unsigned long base;
+ struct pfm_em64t_ds_area *ds;
+
+ ctx_arch = pfm_ctx_arch(ctx);
+
+ hdr = buf;
+ ds = &hdr->hdr_ds;
+
+ hdr->hdr_version = PFM_EM64T_PEBS_SMPL_VERSION;
+ hdr->hdr_buf_size = arg->buf_size;
+ hdr->hdr_overflows = 0;
+
+ /*
+ * align base
+ */
+ base = ((unsigned long)(hdr+1) + 256) & ~0xffUL;
+ hdr->hdr_start_offs = base - (unsigned long)(hdr+1);
+ ds->pebs_buf_base = base;
+ ds->pebs_abs_max = base + arg->buf_size + 1;
+ ds->pebs_intr_thres = base + arg->intr_thres*sizeof(struct pfm_em64t_pebs_smpl_entry);
+ ds->pebs_index = base;
+
+ /*
+ * save counter reset value for IQ_CCCR4 (thread0) or IQ_CCCR5 (thread1)
+ */
+ ds->pebs_cnt_reset = arg->cnt_reset;
+
+ /*
+ * Keep track of DS Area
+ */
+ ctx_arch->ds_area = ds;
+
+ PFM_DBG("buffer=%p buf_size=%zu ctx_flags=0x%x"
+ "pebs_base=0x%llx pebs_max=0x%llx "
+ "pebs_thres=0x%llx cnt_reset=0x%llx",
+ buf,
+ hdr->hdr_buf_size,
+ ctx_arch->flags,
+ ds->pebs_buf_base,
+ ds->pebs_abs_max,
+ ds->pebs_intr_thres,
+ ds->pebs_cnt_reset);
+
+ return 0;
+}
+
+static int pfm_pebs_fmt_handler(void *buf, struct pfm_ovfl_arg *arg,
+ unsigned long ip, u64 tstamp, void *data)
+{
+ struct pfm_em64t_pebs_smpl_hdr *hdr;
+
+ hdr = buf;
+
+ PFM_DBG_ovfl("buffer full");
+ /*
+ * increment number of buffer overflows.
+ * important to detect duplicate set of samples.
+ */
+ hdr->hdr_overflows++;
+
+ /*
+ * request notification and masking of monitoring.
+ * Notification is still subject to the overflowed
+ * register having the FL_NOTIFY flag set.
+ */
+ arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY| PFM_OVFL_CTRL_MASK;
+
+ return -ENOBUFS; /* we are full, sorry */
+}
+
+static int pfm_pebs_fmt_restart(int is_active, pfm_flags_t *ovfl_ctrl, void *buf)
+{
+ struct pfm_em64t_pebs_smpl_hdr *hdr = buf;
+
+ /*
+ * reset index to base of buffer
+ */
+ hdr->hdr_ds.pebs_index = hdr->hdr_ds.pebs_buf_base;
+
+ *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
+
+ return 0;
+}
+
+static int pfm_pebs_fmt_exit(void *buf)
+{
+ return 0;
+}
+
+static struct pfm_smpl_fmt pebs_fmt={
+ .fmt_name = "P4-PEBS-em64t",
+ .fmt_uuid = PFM_EM64T_PEBS_SMPL_UUID,
+ .fmt_arg_size = sizeof(struct pfm_em64t_pebs_smpl_arg),
+ .fmt_validate = pfm_pebs_fmt_validate,
+ .fmt_getsize = pfm_pebs_fmt_get_size,
+ .fmt_init = pfm_pebs_fmt_init,
+ .fmt_handler = pfm_pebs_fmt_handler,
+ .fmt_restart = pfm_pebs_fmt_restart,
+ .fmt_exit = pfm_pebs_fmt_exit,
+ .fmt_flags = PFM_FMT_BUILTIN_FLAG,
+ .owner = THIS_MODULE
+};
+
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1<<12) /* PEBS unavailable */
+#define cpu_has_dts boot_cpu_has(X86_FEATURE_DTES)
+
+static int __init pfm_pebs_fmt_init_module(void)
+{
+ int low, high;
+
+ if (!cpu_has_dts) {
+ PFM_INFO("processor does not have Data Save Area (DS)");
+ return -1;
+ }
+ rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+
+ if (low & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL) {
+ PFM_INFO("processor does not support PEBS");
+ return -1;
+ }
+ return pfm_register_smpl_fmt(&pebs_fmt);
+}
+
+static void __exit pfm_pebs_fmt_cleanup_module(void)
+{
+ pfm_unregister_smpl_fmt(pebs_fmt.fmt_uuid);
+}
+
+module_init(pfm_pebs_fmt_init_module);
+module_exit(pfm_pebs_fmt_cleanup_module);
--- linux-2.6.17.9.base/include/asm-x86_64/perfmon.h 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.17.9/include/asm-x86_64/perfmon.h 2006-08-21 03:37:46.000000000 -0700
@@ -0,0 +1,10 @@
+/*
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
+ * Contributed by Stephane Eranian <[email protected]>
+ *
+ * This file contains X86-64 Processor Family specific definitions
+ * for the perfmon interface.
+ *
+ * This file MUST never be included directly. Use linux/perfmon.h.
+ */
+#include <asm-i386/perfmon.h>
--- linux-2.6.17.9.base/include/asm-x86_64/perfmon_em64t_pebs_smpl.h 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.17.9/include/asm-x86_64/perfmon_em64t_pebs_smpl.h 2006-08-21 03:37:46.000000000 -0700
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
+ * Contributed by Stephane Eranian <[email protected]>
+ *
+ * This file implements the PEBS sampling format for 64-bit
+ * Intel EM64T Pentium 4/Xeon processors. Not to be used with
+ * 32-bit Intel processors.
+ */
+#ifndef __PERFMON_EM64T_PEBS_SMPL_H__
+#define __PERFMON_EM64T_PEBS_SMPL_H__ 1
+
+#define PFM_EM64T_PEBS_SMPL_UUID { \
+ 0x36, 0xbe, 0x97, 0x94, 0x1f, 0xbf, 0x41, 0xdf,\
+ 0xb4, 0x63, 0x10, 0x62, 0xeb, 0x72, 0x9b, 0xad}
+
+/*
+ * format specific parameters (passed at context creation)
+ *
+ * intr_thres: index from start of buffer of entry where the
+ * PMU interrupt must be triggered. It must be several samples
+ * short of the end of the buffer.
+ */
+struct pfm_em64t_pebs_smpl_arg {
+ size_t buf_size; /* size of the buffer in bytes */
+ size_t intr_thres; /* index of interrupt threshold entry */
+ u32 flags; /* buffer specific flags */
+ u64 cnt_reset; /* counter reset value */
+ u32 res1; /* for future use */
+ u64 reserved[2]; /* for future use */
+};
+
+/*
+ * combined context+format specific structure. Can be passed
+ * to pfm_context_create()
+ */
+struct pfm_em64t_pebs_smpl_ctx_arg {
+ struct pfarg_ctx ctx_arg;
+ struct pfm_em64t_pebs_smpl_arg buf_arg;
+};
+
+/*
+ * DS Save Area as described in section 15.10.5 for
+ * 32-bit but extended to 64-bit
+ */
+struct pfm_em64t_ds_area {
+ u64 bts_buf_base;
+ u64 bts_index;
+ u64 bts_abs_max;
+ u64 bts_intr_thres;
+ u64 pebs_buf_base;
+ u64 pebs_index;
+ u64 pebs_abs_max;
+ u64 pebs_intr_thres;
+ u64 pebs_cnt_reset;
+};
+
+/*
+ * This header is at the beginning of the sampling buffer returned to the user.
+ *
+ * Because of PEBS alignement constraints, the actual PEBS buffer area does
+ * not necessarily begin right after the header. The hdr_start_offs must be
+ * used to compute the first byte of the buffer. The offset is defined as
+ * the number of bytes between the end of the header and the beginning of
+ * the buffer. As such the formula is:
+ * actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs
+ */
+struct pfm_em64t_pebs_smpl_hdr {
+ u64 hdr_overflows; /* #overflows for buffer */
+ size_t hdr_buf_size; /* bytes in the buffer */
+ size_t hdr_start_offs; /* actual buffer start offset */
+ u32 hdr_version; /* smpl format version */
+ u64 hdr_res[3]; /* for future use */
+ struct pfm_em64t_ds_area hdr_ds; /* DS management Area */
+};
+
+/*
+ * EM64T PEBS record format as described in
+ * http://www.intel.com/technology/64bitextensions/30083502.pdf
+ */
+struct pfm_em64t_pebs_smpl_entry {
+ u64 eflags;
+ u64 ip;
+ u64 eax;
+ u64 ebx;
+ u64 ecx;
+ u64 edx;
+ u64 esi;
+ u64 edi;
+ u64 ebp;
+ u64 esp;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+};
+
+#define PFM_EM64T_PEBS_SMPL_VERSION_MAJ 1U
+#define PFM_EM64T_PEBS_SMPL_VERSION_MIN 0U
+#define PFM_EM64T_PEBS_SMPL_VERSION (((PFM_EM64T_PEBS_SMPL_VERSION_MAJ&0xffff)<<16)|\
+ (PFM_EM64T_PEBS_SMPL_VERSION_MIN & 0xffff))
+
+#endif /* __PERFMON_EM64T_PEBS_SMPL_H__ */
Stephane Eranian <[email protected]> writes:
Earlier comment about logical pieces applies too.
>
> --- linux-2.6.17.9.base/arch/x86_64/perfmon/Kconfig 1969-12-31 16:00:00.000000000 -0800
> +++ linux-2.6.17.9/arch/x86_64/perfmon/Kconfig 2006-08-21 03:37:46.000000000 -0700
> @@ -0,0 +1,39 @@
> +menu "Hardware Performance Monitoring support"
> +config PERFMON
> + bool "Perfmon2 performance monitoring interface"
> + select X86_LOCAL_APIC
> + default y
No default y please unless the kernel doesn't boot without it.
> + help
> + Enables the perfmon2 interface to access the hardware
> + performance counters. See <http://perfmon2.sf.net/> for
> + more details. If you're unsure, say Y.
> +
> +config X86_64_PERFMON_AMD64
> + tristate "Support 64-bit mode AMD64 hardware performance counters"
> + depends on PERFMON
> + default m
No default m please. If someone just presses return in make oldconfig
with a new kernel they don't want all kinds of new random optional drivers.
I think I would prefer to call it _K8, because in theory new AMD CPUs
might have difference performance counters.
> + help
> + Enables support for 64-bit mode AMD64 hardware performance
> + counters. Does not work with Intel EM64T processors.
> + If unsure, say m.
I would drop the if unsure ... too
> +
> +config X86_64_PERFMON_EM64T
> + tristate "Support Intel EM64T hardware performance counters"
> + depends on PERFMON
> + default m
> + help
> + Enables support for the Intel EM64T hardware performance
> + counters. Does not work with AMD64 processors.
> + If unsure, say m.
Does that include the Core 2 support that you had in the i386 patch?
In general I would prefer to call it P4, not EM64T which is just
a generic architecture name and at least on P4 performance counters
are not really architected yet.
> +
> + if (cpu_data->x86 != 15) {
> + PFM_INFO("unsupported family=%d", cpu_data->x86);
> + return -1;
> + }
> +
> + if (cpu_data->x86_vendor != X86_VENDOR_AMD) {
> + PFM_INFO("not an AMD processor");
> + return -1;
> + }
Doing the checks the other way round would be more logical.
> + *
> + * This file implements the PEBS sampling format for Intel
> + * EM64T Intel Pentium 4/Xeon processors. It does not work
> + * with Intel 32-bit P4/Xeon processors.
Why not anyways? The registers are basically the same. What's so different
in 64bit? oprofile shares that code too.
The file seems a bit underdocumented. At least some brief description
what PEBS is and maybe at least one sentence for each function?
> + */
> +#ifndef __PERFMON_EM64T_PEBS_SMPL_H__
> +#define __PERFMON_EM64T_PEBS_SMPL_H__ 1
> +
> +#define PFM_EM64T_PEBS_SMPL_UUID { \
> + 0x36, 0xbe, 0x97, 0x94, 0x1f, 0xbf, 0x41, 0xdf,\
> + 0xb4, 0x63, 0x10, 0x62, 0xeb, 0x72, 0x9b, 0xad}
What does it need the UUID for?
> +
> +/*
> + * format specific parameters (passed at context creation)
> + *
> + * intr_thres: index from start of buffer of entry where the
> + * PMU interrupt must be triggered. It must be several samples
> + * short of the end of the buffer.
> + */
> +struct pfm_em64t_pebs_smpl_arg {
> + size_t buf_size; /* size of the buffer in bytes */
> + size_t intr_thres; /* index of interrupt threshold entry */
> + u32 flags; /* buffer specific flags */
> + u64 cnt_reset; /* counter reset value */
> + u32 res1; /* for future use */
> + u64 reserved[2]; /* for future use */
I hope you double checked the alignment comes up everywhere correctly.
u64 alignment is different on the 32bit and 64bit ABIs. That can screw
Normally it's safer to use aligned_u64 on files that can be used on
32bit too, because that avoids that problem.
Where is the actual code that implements the code that you hooked
into arch/x86_64/*? I must have missed that.
-Andi
Andi,
On Wed, Aug 23, 2006 at 12:19:44PM +0200, Andi Kleen wrote:
> > +
> > +config X86_64_PERFMON_EM64T
> > + tristate "Support Intel EM64T hardware performance counters"
> > + depends on PERFMON
> > + default m
> > + help
> > + Enables support for the Intel EM64T hardware performance
> > + counters. Does not work with AMD64 processors.
> > + If unsure, say m.
>
> Does that include the Core 2 support that you had in the i386 patch?
>
This overs P4 in 64-bit mode only!
What you have in i386 is the architectural perfmon support as documented
by Intel for Core Duo/Core Solo and possibly others. Core 2 is something
different but hopfully backward compatible with this.
> In general I would prefer to call it P4, not EM64T which is just
> a generic architecture name and at least on P4 performance counters
> are not really architected yet.
>
Agreed.
>
> > + *
> > + * This file implements the PEBS sampling format for Intel
> > + * EM64T Intel Pentium 4/Xeon processors. It does not work
> > + * with Intel 32-bit P4/Xeon processors.
>
> Why not anyways? The registers are basically the same. What's so different
> in 64bit? oprofile shares that code too.
>
Well, no. the PEBS record format is differnet between 32 and 64-bit mode.
In 64-bit they add r8-r15 in each sample. The rest of the logic is
exactly the same. Until now, I have kept the 32 and 64 bit format
completely separate. But I will merge them in my next patch to cut
down on the amount of code.
> The file seems a bit underdocumented. At least some brief description
> what PEBS is and maybe at least one sentence for each function?
>
> > + */
> > +#ifndef __PERFMON_EM64T_PEBS_SMPL_H__
> > +#define __PERFMON_EM64T_PEBS_SMPL_H__ 1
> > +
> > +#define PFM_EM64T_PEBS_SMPL_UUID { \
> > + 0x36, 0xbe, 0x97, 0x94, 0x1f, 0xbf, 0x41, 0xdf,\
> > + 0xb4, 0x63, 0x10, 0x62, 0xeb, 0x72, 0x9b, 0xad}
>
> What does it need the UUID for?
>
Every sampling format is identified by a UUID. This is how an
application can identify the format it wants to use when it
creates a context.
> > +
> > +/*
> > + * format specific parameters (passed at context creation)
> > + *
> > + * intr_thres: index from start of buffer of entry where the
> > + * PMU interrupt must be triggered. It must be several samples
> > + * short of the end of the buffer.
> > + */
> > +struct pfm_em64t_pebs_smpl_arg {
> > + size_t buf_size; /* size of the buffer in bytes */
> > + size_t intr_thres; /* index of interrupt threshold entry */
> > + u32 flags; /* buffer specific flags */
> > + u64 cnt_reset; /* counter reset value */
> > + u32 res1; /* for future use */
> > + u64 reserved[2]; /* for future use */
>
> I hope you double checked the alignment comes up everywhere correctly.
> u64 alignment is different on the 32bit and 64bit ABIs. That can screw
>
> Normally it's safer to use aligned_u64 on files that can be used on
> 32bit too, because that avoids that problem.
As of now this file (perfmon_em64t_pebs_smpl.c) is only compiled in
64-bit mode. Once I merge with 32-bit I will force types to avoid
alignment problems.
> Where is the actual code that implements the code that you hooked
> into arch/x86_64/*? I must have missed that.
>
It is in the patch I call modified x86_64 files.
Thanks for you quick and valuable feedback.
I will make the change you suggested on this part.
--
-Stephane
Andi,
On Wed, Aug 23, 2006 at 12:19:44PM +0200, Andi Kleen wrote:
> > +config X86_64_PERFMON_AMD64
> > + tristate "Support 64-bit mode AMD64 hardware performance counters"
> > + depends on PERFMON
> > + default m
>
> No default m please. If someone just presses return in make oldconfig
> with a new kernel they don't want all kinds of new random optional drivers.
>
> I think I would prefer to call it _K8, because in theory new AMD CPUs
> might have difference performance counters.
>
I have a second thought on this. AMD has architected the performance counters.
Their specification is not part of a model specific documentation but
part of the AMD64 architecure. If they were to radically change the
way performance counters are defined, then the processor would violate the
architecture. They can certainely extened the architecture, e.g., have more
than 4 counters. Those new counters could be model-specific, in which case
I would create a model-specific PMU description for it. If they want
to extened the capability for all new processors, I think a cleaner
way would be to update the architecture and then we could update the
descsription we have.
What I don't not quite understand with the K7, K8 terminology is the
relation/dependencies with the AMD64 architecture specification.
--
-Stephane
> I have a second thought on this. AMD has architected the performance counters.
Quote:
>>
Implementations are not required to support the performance
c o u n t e rs and the event-select registers, or the time-stamp
counter. The presence of these features can be determined by
<<
Also all code I've seen checked the family at least.
> Their specification is not part of a model specific documentation but
> part of the AMD64 architecure.
The high level specification is, but not the actual counters for once.
> What I don't not quite understand with the K7, K8 terminology is the
> relation/dependencies with the AMD64 architecture specification.
AMD64 gives a high level register format, K7/K8 is the actual list
of performance counters.
-Andi
Andi,
On Wed, Aug 23, 2006 at 01:22:44PM +0200, Andi Kleen wrote:
>
> > I have a second thought on this. AMD has architected the performance counters.
>
> Quote:
> >>
> Implementations are not required to support the performance
> c o u n t e rs and the event-select registers, or the time-stamp
> counter. The presence of these features can be determined by
> <<
>
At the end of this paragraph then mention using CPUID to determine
the presence of the counters. AFAIK, there is no feature bit
covering performance monitoring. Does that mean we are left
with having to check the family and model number just like on
Intel?
> Also all code I've seen checked the family at least.
>
>
> > Their specification is not part of a model specific documentation but
> > part of the AMD64 architecure.
>
> The high level specification is, but not the actual counters for once.
>
> > What I don't not quite understand with the K7, K8 terminology is the
> > relation/dependencies with the AMD64 architecture specification.
> AMD64 gives a high level register format, K7/K8 is the actual list
> of performance counters.
Ok, I think I understand now:
1/ Bios and Kernel Developer Guide from Ahtlon64 and Opteron 64 is
what you are talking about with K7/K8
2/ AMD64 Architecture Programmer's Manual is the generic AMD64 description
So in theory, we should have:
- a generic PMU description for the architected PMU as described in 2/
- a K7/K8 PMU description table for Athlon64 and Opteron64 as described in 1/
AFAIK, K7/K8 do not add anything to the architected PMU. I'll rename what we have to perfmon_k8.c
to make it more explicit.
--
-Stephane
On Wednesday 23 August 2006 14:14, Stephane Eranian wrote:
[adding [email protected] so that possibly AMD people can comment]
> On Wed, Aug 23, 2006 at 01:22:44PM +0200, Andi Kleen wrote:
> >
> > > I have a second thought on this. AMD has architected the performance counters.
> >
> > Quote:
> > >>
> > Implementations are not required to support the performance
> > c o u n t e rs and the event-select registers, or the time-stamp
> > counter. The presence of these features can be determined by
> > <<
> >
> At the end of this paragraph then mention using CPUID to determine
> the presence of the counters. AFAIK, there is no feature bit
> covering performance monitoring. Does that mean we are left
> with having to check the family and model number just like on
> Intel?
Yes I puzzled over that too. Maybe they meant the MSR CPUID bits, but most likely
it was a mistake by the tech writer.
Yes I think you have to. Only checking vendor/family should be fine though -- i am not
aware of performance counter variations between models.
Perhaps add a force argument again that disables the family check too.
> Ok, I think I understand now:
> 1/ Bios and Kernel Developer Guide from Ahtlon64 and Opteron 64 is
> what you are talking about with K7/K8
Well K8.
K7 has a different one. But ok. I think you don't try to support K7 at all
currently (it has the same register format as K8, but the list of counters
is different)
> 2/ AMD64 Architecture Programmer's Manual is the generic AMD64 description
Yep
-Andi
Andi,
On Wed, Aug 23, 2006 at 02:29:04PM +0200, Andi Kleen wrote:
> On Wednesday 23 August 2006 14:14, Stephane Eranian wrote:
>
> > On Wed, Aug 23, 2006 at 01:22:44PM +0200, Andi Kleen wrote:
> > >
> > > > I have a second thought on this. AMD has architected the performance counters.
> > >
> > > Quote:
> > > >>
> > > Implementations are not required to support the performance
> > > c o u n t e rs and the event-select registers, or the time-stamp
> > > counter. The presence of these features can be determined by
> > > <<
> > >
> > At the end of this paragraph then mention using CPUID to determine
> > the presence of the counters. AFAIK, there is no feature bit
> > covering performance monitoring. Does that mean we are left
> > with having to check the family and model number just like on
> > Intel?
>
> Yes I puzzled over that too. Maybe they meant the MSR CPUID bits, but most likely
> it was a mistake by the tech writer.
>
> Yes I think you have to. Only checking vendor/family should be fine though -- i am not
> aware of performance counter variations between models.
Today, I am just checking for family 15. If they have variations it could be with
the low power (laptop) models where counters may not be present at all.
> Perhaps add a force argument again that disables the family check too.
>
> > Ok, I think I understand now:
> > 1/ Bios and Kernel Developer Guide from Ahtlon64 and Opteron 64 is
> > what you are talking about with K7/K8
>
> Well K8.
>
> K7 has a different one. But ok. I think you don't try to support K7 at all
> currently (it has the same register format as K8, but the list of counters
> is different)
>
What is the "commercial name" of K7 processors?
--
-Stephane
> If they have variations it could be with
> the low power (laptop) models where counters may not be present at all.
It would surprise me if they ever released anything again with no counters.
It's mainly simulators where the counters are missing (e.g. SimNow is a pretty
accurate simulation otherwise, but doesn't have performance counters)
>
> > Perhaps add a force argument again that disables the family check too.
> >
> > > Ok, I think I understand now:
> > > 1/ Bios and Kernel Developer Guide from Ahtlon64 and Opteron 64 is
> > > what you are talking about with K7/K8
> >
> > Well K8.
> >
> > K7 has a different one. But ok. I think you don't try to support K7 at all
> > currently (it has the same register format as K8, but the list of counters
> > is different)
> >
> What is the "commercial name" of K7 processors?
Athlon/Athlon XP/Athlon MP/Sempron (early models, later are K8)/Duron and some
of the Geodes are K7 based too.
-Andi
Andi,
On Wed, Aug 23, 2006 at 03:44:26PM +0200, Andi Kleen wrote:
> > If they have variations it could be with
> > the low power (laptop) models where counters may not be present at all.
>
> It would surprise me if they ever released anything again with no counters.
>
> It's mainly simulators where the counters are missing (e.g. SimNow is a pretty
> accurate simulation otherwise, but doesn't have performance counters)
>
Ah yes simulators are a good example. Yet what matters is whether or not they
would fault on access to the MSR. It is okay if they implement the MSR but
they do not count anything, nor trigger interrupts. The IA-64 ski simulator is
like this and this is just fine, all you get is zeroes and you cannot sample.
--
-Stephane
On Wed, Aug 23, 2006 at 03:29:32AM -0700, Stephane Eranian wrote:
> > > +#define PFM_EM64T_PEBS_SMPL_UUID { \
> > > + 0x36, 0xbe, 0x97, 0x94, 0x1f, 0xbf, 0x41, 0xdf,\
> > > + 0xb4, 0x63, 0x10, 0x62, 0xeb, 0x72, 0x9b, 0xad}
> >
> > What does it need the UUID for?
> >
> Every sampling format is identified by a UUID. This is how an
> application can identify the format it wants to use when it
> creates a context.
Please use a string name, just like every other interface for
such selections.
Christoph,
On Wed, Aug 23, 2006 at 04:25:55PM +0100, Christoph Hellwig wrote:
> On Wed, Aug 23, 2006 at 03:29:32AM -0700, Stephane Eranian wrote:
> > > > +#define PFM_EM64T_PEBS_SMPL_UUID { \
> > > > + 0x36, 0xbe, 0x97, 0x94, 0x1f, 0xbf, 0x41, 0xdf,\
> > > > + 0xb4, 0x63, 0x10, 0x62, 0xeb, 0x72, 0x9b, 0xad}
> > >
> > > What does it need the UUID for?
> > >
> > Every sampling format is identified by a UUID. This is how an
> > application can identify the format it wants to use when it
> > creates a context.
>
> Please use a string name, just like every other interface for
> such selections.
What are the advantages at the syscall level (i.e., copy_user,
format parsing) compared to a 16-entry byte array?
We are passing that uuid into a struct:
struct pfarg_ctx {
__u8 ctx_smpl_buf_id[16];
__u32 ctx_flags;
__s32 ctx_fd;
__u64 ctx_smpl_buf_size;
__u64 ctx_reserved3[12];
};
--
-Stephane
On Wed, Aug 23, 2006 at 08:53:21AM -0700, Stephane Eranian wrote:
> > Please use a string name, just like every other interface for
> > such selections.
>
> What are the advantages at the syscall level (i.e., copy_user,
> format parsing) compared to a 16-entry byte array?
None. In fact for a machine it doesn't matter whether you parse
an char foo[16] array as uuid or string. The string is extremly
useful when a human enters a game, e.g. when debugging or
trouble-shooting things. (or just reading the code)