This patchset supports the new profiling hardware available in the latest AMD CPUs in the oProfile driver.
These new AMD processors CPUs support Instruction Based Sampling (IBS). See
Instruction-Based Sampling: A New Performance Analysis Technique
for AMD Family 10h Processors, November 19, 2007
http://developer.amd.com/assets/AMD_IBS_paper_EN.pdf
for more information about IBS.
IBS support requires changes to the oProfile driver to gather this information and initialize the new MSRs associated with these new features.
This patch adds 2 new types of Profiling samples IBS_FETCH and IBS_OP to the per CPU buffers and the event buffers of the oProfile driver.
It also adds new control entries to /dev/oprofile to control IBS sampling.
These changes are backward compatible with the previous PMC only version of the driver, and a separate patch is available to oProfile 0.9.3 to use this new data.
These changes have been extensively tested at AMD on Family10h systems.
Barry Kasindorf [email protected]
Signed-off-by: Barry Kasindorf <[email protected]>
---
nmi_int.c | 49 +++++++++++++++++++++++++++++++++++++++++++++----
op_counter.h | 16 +++++++++++++---
2 files changed, 58 insertions(+), 7 deletions(-)
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index cc48d3f..32a5e8e 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -218,6 +218,11 @@ static int nmi_setup(void)
}
}
+
+ /* setup AMD Family10h+ IBS irq if needed */
+ if (IBS_avail())
+ setup_ibs_nmi();
+
on_each_cpu(nmi_save_registers, NULL, 0, 1);
on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
nmi_enabled = 1;
@@ -275,6 +280,10 @@ static void nmi_shutdown(void)
unregister_die_notifier(&profile_exceptions_nb);
model->shutdown(msrs);
free_msrs();
+
+ /* clear AMD Family10h+ IBS irq if needed */
+ if (IBS_avail())
+ clear_ibs_nmi();
}
static void nmi_cpu_start(void *dummy)
@@ -301,15 +310,14 @@ static void nmi_stop(void)
}
struct op_counter_config counter_config[OP_MAX_COUNTER];
+struct op_ibs_config ibs_config;
static int nmi_create_files(struct super_block *sb, struct dentry *root)
{
unsigned int i;
-
+ struct dentry *dir;
for (i = 0; i < model->num_counters; ++i) {
- struct dentry *dir;
char buf[4];
-
/* quick little hack to _not_ expose a counter if it is not
* available for use. This should protect userspace app.
* NOTE: assumes 1:1 mapping here (that counters are organized
@@ -328,6 +336,33 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
}
+ /* Setup AMD Family10h+ IBS control if needed */
+ if (IBS_avail()) {
+ char buf[12];
+
+ /* setup some reasonable defaults */
+ ibs_config.max_cnt_fetch = 250000;
+ ibs_config.FETCH_enabled = 0;
+ ibs_config.max_cnt_op = 250000;
+ ibs_config.OP_enabled = 0;
+ ibs_config.dispatched_ops = 1;
+ snprintf(buf, sizeof(buf), "ibs_fetch");
+ dir = oprofilefs_mkdir(sb, root, buf);
+ oprofilefs_create_ulong(sb, dir, "ran_enable",
+ &ibs_config.rand_en);
+ oprofilefs_create_ulong(sb, dir, "enable",
+ &ibs_config.FETCH_enabled);
+ oprofilefs_create_ulong(sb, dir, "max_count",
+ &ibs_config.max_cnt_fetch);
+ snprintf(buf, sizeof(buf), "ibs_uops");
+ dir = oprofilefs_mkdir(sb, root, buf);
+ oprofilefs_create_ulong(sb, dir, "enable",
+ &ibs_config.OP_enabled);
+ oprofilefs_create_ulong(sb, dir, "max_count",
+ &ibs_config.max_cnt_op);
+ oprofilefs_create_ulong(sb, dir, "dispatched_ops",
+ &ibs_config.dispatched_ops);
+ }
return 0;
}
@@ -419,9 +454,15 @@ int __init op_nmi_init(struct oprofile_operations *ops)
break;
case 0x10:
model = &op_athlon_spec;
- cpu_type = "x86-64/family10";
+ cpu_type = "x86-64/family10h";
+ break;
+ case 0x11:
+ model = &op_athlon_spec;
+ cpu_type = "x86-64/family11h";
break;
}
+ /* set global if IBS profiling is available */
+ check_IBS_avail(family);
break;
case X86_VENDOR_INTEL:
diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h
index 2880b15..ddab57c 100644
--- a/arch/x86/oprofile/op_counter.h
+++ b/arch/x86/oprofile/op_counter.h
@@ -6,12 +6,12 @@
*
* @author John Levon
*/
-
+
#ifndef OP_COUNTER_H
#define OP_COUNTER_H
-
+
#define OP_MAX_COUNTER 8
-
+
/* Per-perfctr configuration as set via
* oprofilefs.
*/
@@ -26,4 +26,14 @@ struct op_counter_config {
extern struct op_counter_config counter_config[];
+struct op_ibs_config {
+ unsigned long OP_enabled;
+ unsigned long FETCH_enabled;
+ unsigned long max_cnt_fetch;
+ unsigned long max_cnt_op;
+ unsigned long rand_en;
+ unsigned long dispatched_ops;
+};
+
+extern struct op_ibs_config ibs_config;
#endif /* OP_COUNTER_H */
Signed-off-by: Barry Kasindorf <[email protected]>
---
drivers/oprofile/buffer_sync.c | 140 ++++++++++++++++++++++++++++++-----------
drivers/oprofile/cpu_buffer.c | 102 +++++++++++++++++++++++++++--
drivers/oprofile/cpu_buffer.h | 3
include/linux/oprofile.h | 13 +++
4 files changed, 213 insertions(+), 45 deletions(-)
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 9304c45..d6f3305 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -5,6 +5,7 @@
* @remark Read the file COPYING
*
* @author John Levon <[email protected]>
+ * @author Barry Kasindorf
*
* This is the core of the buffer management. Each
* CPU buffer is processed and entered into the
@@ -33,7 +34,7 @@
#include "event_buffer.h"
#include "cpu_buffer.h"
#include "buffer_sync.h"
-
+
static LIST_HEAD(dying_tasks);
static LIST_HEAD(dead_tasks);
static cpumask_t marked_cpus = CPU_MASK_NONE;
@@ -99,7 +100,7 @@ static int munmap_notify(struct notifier_block * self, unsigned long val, void *
return 0;
}
-
+
/* We need to be told about new modules so we don't attribute to a previously
* loaded module, or drop the samples on the floor.
*/
@@ -118,7 +119,7 @@ static int module_load_notify(struct notifier_block * self, unsigned long val, v
return 0;
}
-
+
static struct notifier_block task_free_nb = {
.notifier_call = task_free_notify,
};
@@ -135,7 +136,7 @@ static struct notifier_block module_load_nb = {
.notifier_call = module_load_notify,
};
-
+
static void end_sync(void)
{
end_cpu_work();
@@ -212,10 +213,10 @@ static unsigned long get_exec_dcookie(struct mm_struct * mm)
{
unsigned long cookie = NO_COOKIE;
struct vm_area_struct * vma;
-
+
if (!mm)
goto out;
-
+
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (!vma->vm_file)
continue;
@@ -241,7 +242,7 @@ static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, o
struct vm_area_struct * vma;
for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
-
+
if (addr < vma->vm_start || addr >= vma->vm_end)
continue;
@@ -263,9 +264,19 @@ static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, o
return cookie;
}
+static void increment_tail(struct oprofile_cpu_buffer *b)
+{
+ unsigned long new_tail = b->tail_pos + 1;
+
+ rmb(); /* be sure fifo pointers are synchromized */
+ if (new_tail < b->buffer_size)
+ b->tail_pos = new_tail;
+ else
+ b->tail_pos = 0;
+}
static unsigned long last_cookie = INVALID_COOKIE;
-
+
static void add_cpu_switch(int i)
{
add_event_entry(ESCAPE_CODE);
@@ -278,16 +289,16 @@ static void add_kernel_ctx_switch(unsigned int in_kernel)
{
add_event_entry(ESCAPE_CODE);
if (in_kernel)
- add_event_entry(KERNEL_ENTER_SWITCH_CODE);
+ add_event_entry(KERNEL_ENTER_SWITCH_CODE);
else
- add_event_entry(KERNEL_EXIT_SWITCH_CODE);
+ add_event_entry(KERNEL_EXIT_SWITCH_CODE);
}
-
+
static void
add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
{
add_event_entry(ESCAPE_CODE);
- add_event_entry(CTX_SWITCH_CODE);
+ add_event_entry(CTX_SWITCH_CODE);
add_event_entry(task->pid);
add_event_entry(cookie);
/* Another code for daemon back-compat */
@@ -296,7 +307,7 @@ add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
add_event_entry(task->tgid);
}
-
+
static void add_cookie_switch(unsigned long cookie)
{
add_event_entry(ESCAPE_CODE);
@@ -304,13 +315,74 @@ static void add_cookie_switch(unsigned long cookie)
add_event_entry(cookie);
}
-
+
static void add_trace_begin(void)
{
add_event_entry(ESCAPE_CODE);
add_event_entry(TRACE_BEGIN_CODE);
}
+/*
+ * Add IBS fetch and op entries to event buffer
+ */
+static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code,
+ int in_kernel, struct mm_struct *mm)
+{
+ unsigned long rip;
+ int i, count;
+ unsigned long ibs_cookie = 0;
+ off_t offset;
+
+ increment_tail(cpu_buf); /* move to RIP entry */
+
+ rip = ((struct op_sample *)&cpu_buf->buffer[cpu_buf->tail_pos])->eip;
+
+#ifdef __LP64__
+ rip +=
+ ((struct op_sample *)&cpu_buf->buffer[cpu_buf->tail_pos])->event << 32;
+#endif
+
+ if (mm) {
+ ibs_cookie = lookup_dcookie(mm, rip, &offset);
+
+ if (ibs_cookie == NO_COOKIE)
+ offset = rip;
+ if (ibs_cookie == INVALID_COOKIE) {
+ atomic_inc(&oprofile_stats.sample_lost_no_mapping);
+ offset = rip;
+ }
+ if (ibs_cookie != last_cookie) {
+ add_cookie_switch(ibs_cookie);
+ last_cookie = ibs_cookie;
+ }
+ }
+
+ else
+ offset = rip;
+
+ add_event_entry(ESCAPE_CODE);
+ add_event_entry(code);
+ add_event_entry(offset); /* Offset from Dcookie */
+
+ /* we send the Dcookie offset, but send the raw Linear Add also*/
+ add_event_entry(
+ ((struct op_sample *)&cpu_buf->buffer[cpu_buf->tail_pos])->eip);
+ add_event_entry(
+ ((struct op_sample *)&cpu_buf->buffer[cpu_buf->tail_pos])->event);
+
+ if (code == IBS_FETCH_CODE)
+ count = 2;
+ else
+ count = 5; /*IBS OP is 5 int64s long */
+
+ for (i = 0; i < count; i++) {
+ increment_tail(cpu_buf);
+ add_event_entry(
+ ((struct op_sample *)&cpu_buf->buffer[cpu_buf->tail_pos])->eip);
+ add_event_entry(
+ ((struct op_sample *)&cpu_buf->buffer[cpu_buf->tail_pos])->event);
+ }
+}
static void add_sample_entry(unsigned long offset, unsigned long event)
{
@@ -323,9 +395,9 @@ static int add_us_sample(struct mm_struct * mm, struct op_sample * s)
{
unsigned long cookie;
off_t offset;
-
+
cookie = lookup_dcookie(mm, s->eip, &offset);
-
+
if (cookie == INVALID_COOKIE) {
atomic_inc(&oprofile_stats.sample_lost_no_mapping);
return 0;
@@ -341,7 +413,7 @@ static int add_us_sample(struct mm_struct * mm, struct op_sample * s)
return 1;
}
-
+
/* Add a sample to the global event buffer. If possible the
* sample is converted into a persistent dentry/offset pair
* for later lookup from userspace.
@@ -359,7 +431,7 @@ add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
}
return 0;
}
-
+
static void release_mm(struct mm_struct * mm)
{
@@ -383,7 +455,7 @@ static inline int is_code(unsigned long val)
{
return val == ESCAPE_CODE;
}
-
+
/* "acquire" as many cpu buffer slots as we can */
static unsigned long get_slots(struct oprofile_cpu_buffer * b)
@@ -412,19 +484,6 @@ static unsigned long get_slots(struct oprofile_cpu_buffer * b)
}
-static void increment_tail(struct oprofile_cpu_buffer * b)
-{
- unsigned long new_tail = b->tail_pos + 1;
-
- rmb();
-
- if (new_tail < b->buffer_size)
- b->tail_pos = new_tail;
- else
- b->tail_pos = 0;
-}
-
-
/* Move tasks along towards death. Any tasks on dead_tasks
* will definitely have no remaining references in any
* CPU buffers at this point, because we use two lists,
@@ -496,21 +555,20 @@ void sync_buffer(int cpu)
struct task_struct * new;
unsigned long cookie = 0;
int in_kernel = 1;
- unsigned int i;
sync_buffer_state state = sb_buffer_start;
unsigned long available;
mutex_lock(&buffer_mutex);
-
+
add_cpu_switch(cpu);
/* Remember, only we can modify tail_pos */
available = get_slots(cpu_buf);
- for (i = 0; i < available; ++i) {
- struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
-
+ while (get_slots(cpu_buf)) {
+ struct op_sample *s = &cpu_buf->buffer[cpu_buf->tail_pos];
+
if (is_code(s->eip)) {
if (s->event <= CPU_IS_KERNEL) {
/* kernel/userspace switch */
@@ -521,6 +579,14 @@ void sync_buffer(int cpu)
} else if (s->event == CPU_TRACE_BEGIN) {
state = sb_bt_start;
add_trace_begin();
+ } else if (s->event == IBS_FETCH_BEGIN) {
+ state = sb_bt_start;
+ add_ibs_begin(cpu_buf,
+ IBS_FETCH_CODE, in_kernel, mm);
+ } else if (s->event == IBS_OP_BEGIN) {
+ state = sb_bt_start;
+ add_ibs_begin(cpu_buf,
+ IBS_OP_CODE, in_kernel, mm);
} else {
struct mm_struct * oldmm = mm;
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 2450b3a..b08b11d 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -5,6 +5,7 @@
* @remark Read the file COPYING
*
* @author John Levon <[email protected]>
+ * @author Barry Kasindorf <[email protected]>
*
* Each CPU has a local buffer that stores PC value/event
* pairs. We also log context switches when we notice them.
@@ -21,7 +22,7 @@
#include <linux/oprofile.h>
#include <linux/vmalloc.h>
#include <linux/errno.h>
-
+
#include "event_buffer.h"
#include "cpu_buffer.h"
#include "buffer_sync.h"
@@ -37,7 +38,7 @@ static int work_enabled;
void free_cpu_buffers(void)
{
int i;
-
+
for_each_online_cpu(i)
vfree(per_cpu(cpu_buffer, i).buffer);
}
@@ -45,17 +46,17 @@ void free_cpu_buffers(void)
int alloc_cpu_buffers(void)
{
int i;
-
+
unsigned long buffer_size = fs_cpu_buffer_size;
-
+
for_each_online_cpu(i) {
struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i);
-
+
b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size,
cpu_to_node(i));
if (!b->buffer)
goto fail;
-
+
b->last_task = NULL;
b->last_is_kernel = -1;
b->tracing = 0;
@@ -202,12 +203,55 @@ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
cpu_buf->last_task = task;
add_code(cpu_buf, (unsigned long)task);
}
-
+
add_sample(cpu_buf, pc, event);
return 1;
}
-static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf)
+static int log_ibs_sample(struct oprofile_cpu_buffer *cpu_buf,
+ unsigned long pc, int is_kernel, unsigned int *ibs, int ibs_code)
+{
+ struct task_struct *task;
+
+ cpu_buf->sample_received++;
+
+ if (nr_available_slots(cpu_buf) < 14) {
+ cpu_buf->sample_lost_overflow++;
+ return 0;
+ }
+
+ is_kernel = !!is_kernel;
+
+ /* notice a switch from user->kernel or vice versa */
+ if (cpu_buf->last_is_kernel != is_kernel) {
+ cpu_buf->last_is_kernel = is_kernel;
+ add_code(cpu_buf, is_kernel);
+ }
+
+ /* notice a task switch */
+ if (!is_kernel) {
+ task = current;
+
+ if (cpu_buf->last_task != task) {
+ cpu_buf->last_task = task;
+ add_code(cpu_buf, (unsigned long)task);
+ }
+ }
+
+ add_code(cpu_buf, ibs_code);
+ add_sample(cpu_buf, ibs[0], ibs[1]);
+ add_sample(cpu_buf, ibs[2], ibs[3]);
+ add_sample(cpu_buf, ibs[4], ibs[5]);
+
+ if (ibs_code == IBS_OP_BEGIN) {
+ add_sample(cpu_buf, ibs[6], ibs[7]);
+ add_sample(cpu_buf, ibs[8], ibs[9]);
+ add_sample(cpu_buf, ibs[10], ibs[11]);
+ }
+
+ return 1;
+}
+static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
{
if (nr_available_slots(cpu_buf) < 4) {
cpu_buf->sample_lost_overflow++;
@@ -252,6 +296,48 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
oprofile_add_ext_sample(pc, regs, event, is_kernel);
}
+void oprofile_add_ibs_fetch_sample(struct pt_regs *const regs,
+ unsigned int * const ibs_fetch)
+{
+ int is_kernel = !user_mode(regs);
+ unsigned long pc = profile_pc(regs);
+
+ struct oprofile_cpu_buffer *cpu_buf =
+ &per_cpu(cpu_buffer, smp_processor_id());
+
+ if (!backtrace_depth) {
+ log_ibs_sample(cpu_buf, pc, is_kernel,
+ ibs_fetch, IBS_FETCH_BEGIN);
+ return;
+ }
+
+ /* if log_sample() fails we can't backtrace since we lost the source
+ * of this event */
+ if (log_ibs_sample(cpu_buf, pc, is_kernel, ibs_fetch, IBS_FETCH_BEGIN))
+ oprofile_ops.backtrace(regs, backtrace_depth);
+}
+
+
+void oprofile_add_ibs_op_sample(struct pt_regs *const regs,
+ unsigned int * const ibs_op)
+{
+ int is_kernel = !user_mode(regs);
+ unsigned long pc = profile_pc(regs);
+
+ struct oprofile_cpu_buffer *cpu_buf =
+ &per_cpu(cpu_buffer, smp_processor_id());
+
+ if (!backtrace_depth) {
+ log_ibs_sample(cpu_buf, pc, is_kernel, ibs_op, IBS_OP_BEGIN);
+ return;
+ }
+
+ /* if log_sample() fails we can't backtrace since we lost the source
+ * of this event */
+ if (log_ibs_sample(cpu_buf, pc, is_kernel, ibs_op, IBS_OP_BEGIN))
+ oprofile_ops.backtrace(regs, backtrace_depth);
+}
+
void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
{
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index c3e366b..4fa475f 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -53,7 +53,10 @@ DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer);
void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
/* transient events for the CPU buffer -> event buffer */
+#define CPU_IS_USER 0
#define CPU_IS_KERNEL 1
#define CPU_TRACE_BEGIN 2
+#define IBS_FETCH_BEGIN 3
+#define IBS_OP_BEGIN 4
#endif /* OPROFILE_CPU_BUFFER_H */
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 041bb31..6fe46f7 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -36,6 +36,9 @@
#define XEN_ENTER_SWITCH_CODE 10
#define SPU_PROFILING_CODE 11
#define SPU_CTX_SWITCH_CODE 12
+#define IBS_FETCH_CODE 13
+#define IBS_OP_CODE 14
+
struct super_block;
struct dentry;
@@ -106,6 +109,16 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event);
void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
unsigned long event, int is_kernel);
+/**
+ * Add an AMD IBS sample. This may be called from any context. Pass
+ * smp_processor_id() as cpu. PAsses IBS registers as a unsigned int[8]
+ */
+void oprofile_add_ibs_op_sample(struct pt_regs * const regs,
+ unsigned int * const ibs_op);
+
+void oprofile_add_ibs_fetch_sample(struct pt_regs * const regs,
+ unsigned int * const ibs_fetch);
+
/* Use this instead when the PC value is not from the regs. Doesn't
* backtrace. */
void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event);
Signed-off-by: Barry Kasindorf <[email protected]>
---
arch/x86/kernel/apic_32.c | 24 +++
arch/x86/kernel/apic_64.c | 1
arch/x86/oprofile/op_model_athlon.c | 265 +++++++++++++++++++++++++++++++++++-
arch/x86/oprofile/op_x86_model.h | 42 +++++
include/asm-x86/apicdef.h | 3
5 files changed, 334 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4ed4a2b..7ca1d31 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -237,6 +237,30 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
if (!oneshot)
apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}
+#define APIC_EILVT_LVTOFF_MCE 0
+#define APIC_EILVT_LVTOFF_IBS 1
+
+static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
+{
+ unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+ unsigned int v = (mask << 16) | (msg_type << 8) | vector;
+
+ apic_write(reg, v);
+}
+
+u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
+{
+ setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
+ return APIC_EILVT_LVTOFF_MCE;
+}
+
+u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
+{
+ setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
+ return APIC_EILVT_LVTOFF_IBS;
+}
+EXPORT_SYMBOL(setup_APIC_eilvt_ibs);
+
/*
* Program the next event, relative to now
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 26514b5..e0dbe5f 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -229,6 +229,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
return APIC_EILVT_LVTOFF_IBS;
}
+EXPORT_SYMBOL(setup_APIC_eilvt_ibs);
/*
* Program the next event, relative to now
diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index 6b9008c..23adc8e 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h
@@ -123,6 +123,9 @@
#define APIC_EILVT_MSG_NMI 0x4
#define APIC_EILVT_MSG_EXT 0x7
#define APIC_EILVT_MASKED (1 << 16)
+#define APIC_EILVT_MASK_INT 1
+#define APIC_EILVT_ENA_INT 0
+
#define APIC_EILVT1 0x510
#define APIC_EILVT2 0x520
#define APIC_EILVT3 0x530
diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c
index 3d53487..c5b43f1 100644
--- a/arch/x86/oprofile/op_model_athlon.c
+++ b/arch/x86/oprofile/op_model_athlon.c
@@ -8,9 +8,13 @@
* @author John Levon
* @author Philippe Elie
* @author Graydon Hoare
- */
+ * @author Barry Kasindorf
+*/
#include <linux/oprofile.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+
#include <asm/ptrace.h>
#include <asm/msr.h>
#include <asm/nmi.h>
@@ -42,7 +46,71 @@
#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
+/* high dword IbsFetchCtl[bit 49] */
+#define IBS_FETCH_VALID_BIT 0x00020000
+/* high dword IbsFetchCtl[bit 52] */
+#define IBS_FETCH_PHY_ADDR_VALID_BIT 0x00100000
+#define IBS_FETCH_CTL_HIGH_MASK 0xFFFFFFFF
+/* high dword IbsFetchCtl[bit 48] */
+#define IBS_FETCH_ENABLE 0x00010000
+#define IBS_FETCH_CTL_CNT_MASK 0x00000000FFFF0000
+#define IBS_FETCH_CTL_MAX_CNT_MASK 0x000000000000FFFF
+
+/*IbsOpCtl masks/bits */
+#define IBS_OP_VALID_BIT 0x0000000000040000 /* IbsOpCtl[bit18] */
+#define IBS_OP_ENABLE 0x0000000000020000 /* IBS_OP_ENABLE[bit17]*/
+
+/*IbsOpData masks */
+#define IBS_OP_DATA_BRANCH_MASK 0x3F00000000 /* IbsOpData[32:37] */
+#define IBS_OP_DATA_HIGH_MASK 0x0000FFFF00000000 /* IbsOpData[32:47] */
+#define IBS_OP_DATA_LOW_MASK 0x00000000FFFFFFFF /*IbsOpData[0:31] */
+
+/*IbsOpData2 masks */
+#define IBS_OP_DATA2_MASK 0x000000000000002F
+
+/*IbsOpData3 masks */
+#define IBS_OP_DATA3_LS_MASK 0x0000000003
+
+#define IBS_OP_DATA3_PHY_ADDR_VALID_BIT 0x0000000000040000
+#define IBS_OP_DATA3_LIN_ADDR_VALID_BIT 0x0000000000020000
+#define IBS_CTL_LVT_OFFSET_VALID_BIT 0x100
+/* AMD ext internal APIC Local Vectors */
+#define APIC_IELVT 0x500
+/* number of APIC Entries for ieLVT */
+#define NUM_APIC_IELVT 4
+
+/*PCI Extended Configuration Constants */
+/* Northbridge Configuration Register */
+#define NB_CFG_MSR 0xC001001F
+/* Bit 46, EnableCf8ExtCfg: enable CF8 extended configuration cycles */
+#define ENABLE_CF8_EXT_CFG_MASK 0x4000
+/* MSR to set the IBS control register APIC LVT offset */
+#define IBS_LVT_OFFSET_PCI 0x1CC
+
+/* IBS rev [bit 10] 1 = IBS Rev B */
+#define IBS_REV_MASK 0x400
+
+#define Family10H 0x10
+#define IBS_AVAILABLE_BIT 0x40
+
+/* When pci_ids.h gets caught up remove this */
+#ifndef PCI_DEVICE_ID_AMD_FAMILY10H_NB
+#define PCI_DEVICE_ID_AMD_FAMILY10H_NB 0x1200
+#endif
+
+/**
+ * Add an AMD IBS sample. This may be called from any context. Pass
+ * smp_processor_id() as cpu. Passes IBS registers as a unsigned int[8]
+ */
+void oprofile_add_ibs_op_sample(struct pt_regs * const regs,
+ unsigned int * const ibs_op);
+
+void oprofile_add_ibs_fetch_sample(struct pt_regs * const regs,
+ unsigned int * const ibs_fetch);
+
static unsigned long reset_value[NUM_COUNTERS];
+static int Extended_PCI_Enabled;
+static int ibs_allowed; /* AMD Family10h and later */
static void athlon_fill_in_addresses(struct op_msrs * const msrs)
{
@@ -118,6 +186,8 @@ static int athlon_check_ctrs(struct pt_regs * const regs,
{
unsigned int low, high;
int i;
+ struct ibs_fetch_sample ibs_fetch;
+ struct ibs_op_sample ibs_op;
for (i = 0 ; i < NUM_COUNTERS; ++i) {
if (!reset_value[i])
@@ -129,6 +199,63 @@ static int athlon_check_ctrs(struct pt_regs * const regs,
}
}
+ /*If AMD and IBS is available */
+ if (ibs_allowed && ibs_config.FETCH_enabled) {
+ rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+ if (high & IBS_FETCH_VALID_BIT) {
+ ibs_fetch.ibs_fetch_ctl_high = high;
+ ibs_fetch.ibs_fetch_ctl_low = low;
+ rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high);
+ ibs_fetch.ibs_fetch_lin_addr_high = high;
+ ibs_fetch.ibs_fetch_lin_addr_low = low;
+ rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high);
+ ibs_fetch.ibs_fetch_phys_addr_high = high;
+ ibs_fetch.ibs_fetch_phys_addr_low = low;
+
+ oprofile_add_ibs_fetch_sample(regs,
+ (unsigned int *)&ibs_fetch);
+
+ /*reenable the IRQ */
+ rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+ high &= ~(IBS_FETCH_VALID_BIT);
+ high |= IBS_FETCH_ENABLE;
+ low &= IBS_FETCH_CTL_MAX_CNT_MASK;
+ wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+ }
+ }
+
+ if (ibs_allowed && ibs_config.OP_enabled) {
+ rdmsr(MSR_AMD64_IBSOPCTL, low, high);
+ if (low & IBS_OP_VALID_BIT) {
+ rdmsr(MSR_AMD64_IBSOPRIP, low, high);
+ ibs_op.ibs_op_rip_low = low;
+ ibs_op.ibs_op_rip_high = high;
+ rdmsr(MSR_AMD64_IBSOPDATA, low, high);
+ ibs_op.ibs_op_data1_low = low;
+ ibs_op.ibs_op_data1_high = high;
+ rdmsr(MSR_AMD64_IBSOPDATA2, low, high);
+ ibs_op.ibs_op_data2_low = low;
+ ibs_op.ibs_op_data2_high = high;
+ rdmsr(MSR_AMD64_IBSOPDATA3, low, high);
+ ibs_op.ibs_op_data3_low = low;
+ ibs_op.ibs_op_data3_high = high;
+ rdmsr(MSR_AMD64_IBSDCLINAD, low, high);
+ ibs_op.ibs_dc_linear_low = low;
+ ibs_op.ibs_dc_linear_high = high;
+ rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high);
+ ibs_op.ibs_dc_phys_low = low;
+ ibs_op.ibs_dc_phys_high = high;
+
+ /* reenable the IRQ */
+ oprofile_add_ibs_op_sample(regs,
+ (unsigned int *)&ibs_op);
+ rdmsr(MSR_AMD64_IBSOPCTL, low, high);
+ low &= ~(IBS_OP_VALID_BIT);
+ low |= IBS_OP_ENABLE;
+ wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+ }
+ }
+
/* See op_model_ppro.c */
return 1;
}
@@ -145,6 +272,17 @@ static void athlon_start(struct op_msrs const * const msrs)
CTRL_WRITE(low, high, msrs, i);
}
}
+ if (ibs_allowed && ibs_config.FETCH_enabled) {
+ low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
+ high = IBS_FETCH_ENABLE;
+ wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+ }
+
+ if (ibs_allowed && ibs_config.OP_enabled) {
+ low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_ENABLE;
+ high = 0;
+ wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+ }
}
@@ -162,6 +300,18 @@ static void athlon_stop(struct op_msrs const * const msrs)
CTRL_SET_INACTIVE(low);
CTRL_WRITE(low, high, msrs, i);
}
+
+ if (ibs_allowed && ibs_config.FETCH_enabled) {
+ low = 0; /* clear max count and enable */
+ high = 0;
+ wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+ }
+
+ if (ibs_allowed && ibs_config.OP_enabled) {
+ low = 0; /* clear max count and enable */
+ high = 0;
+ wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+ }
}
static void athlon_shutdown(struct op_msrs const * const msrs)
@@ -178,6 +328,119 @@ static void athlon_shutdown(struct op_msrs const * const msrs)
}
}
+void check_IBS_avail(__u8 family)
+{
+ u32 eax, ebx, ecx, edx;
+ if (family >= Family10H) {
+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+ if (ecx & IBS_AVAILABLE_BIT)
+ ibs_allowed = 1;
+ else
+ ibs_allowed = 0;
+ }
+}
+
+int IBS_avail(void)
+{
+ return(ibs_allowed);
+}
+
+/*
+ * Enable AMD extended PCI config space thru IO
+ * save previous state
+ */
+static void
+ Enable_Extended_PCI_Config(void)
+{
+ unsigned int low, high;
+ rdmsr(NB_CFG_MSR, low, high);
+ Extended_PCI_Enabled = high & ENABLE_CF8_EXT_CFG_MASK;
+ high |= ENABLE_CF8_EXT_CFG_MASK;
+ wrmsr(NB_CFG_MSR, low, high);
+}
+
+/*
+ * Disable AMD extended PCI config space thru IO
+ * restore to previous state
+ */
+static void
+ Disable_Extended_PCI_Config(void)
+{
+ unsigned int low, high;
+ rdmsr(NB_CFG_MSR, low, high);
+ high &= ~ENABLE_CF8_EXT_CFG_MASK;
+ high |= Extended_PCI_Enabled;
+ wrmsr(NB_CFG_MSR, low, high);
+}
+/*
+ * Modified to use AMD extended PCI config space thru IO
+ * these 2 I/Os should be atomic but there is no easy way to do that.
+ * Should use the MMio version, will when it is fixed
+ */
+
+static void
+ PCI_Extended_Write(struct pci_dev *dev, unsigned int offset,
+ unsigned long val)
+{
+ outl(0x80000000 | (((offset >> 8) & 0x0f) << 24) |
+ ((dev->bus->number & 0xff) << 16) | ((dev->devfn | 3) << 8)
+ | (offset & 0x0fc), 0x0cf8);
+
+ outl(val, 0xcfc);
+}
+
+static inline void APIC_init_per_cpu(void *arg)
+{
+ setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, APIC_EILVT_ENA_INT);
+}
+
+static inline void APIC_clear_per_cpu(void *arg)
+{
+ setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, APIC_EILVT_MASK_INT);
+}
+
+/*
+ * initialize the APIC for the IBS interrupts
+ * if needed on AMD Family10h rev B0 and later
+ */
+void setup_ibs_nmi(void)
+{
+ struct pci_dev *gh_device = NULL;
+ u32 low, high;
+
+ /* This is a hack to get APIC_EILVT_LVTOFF_IBS */
+ unsigned long i = setup_APIC_eilvt_ibs(0, 0, 1);
+
+ /*see if the IBS control register is already set correctly*/
+ /*remove this when we know for sure it is done in the kernel init*/
+ rdmsr(MSR_AMD64_IBSCTL, low, high);
+ if ((low & (IBS_CTL_LVT_OFFSET_VALID_BIT | i)) !=
+ (IBS_CTL_LVT_OFFSET_VALID_BIT | i)) {
+ Enable_Extended_PCI_Config();
+
+ /**** Be sure to run loop until NULL is returned to
+ decrement reference count on any pci_dev structures returned ****/
+ while ((gh_device = pci_get_device(PCI_VENDOR_ID_AMD,
+ PCI_DEVICE_ID_AMD_FAMILY10H_NB, gh_device)) != NULL) {
+ /* This code may change if we can find a proper
+ * way to get at the PCI extended config space */
+ PCI_Extended_Write(
+ gh_device, IBS_LVT_OFFSET_PCI,
+ (i | IBS_CTL_LVT_OFFSET_VALID_BIT));
+ }
+ Disable_Extended_PCI_Config();
+ }
+ on_each_cpu(APIC_init_per_cpu, NULL, 1, 1);
+}
+
+/*
+ * unitialize the APIC for the IBS interrupts if needed on AMD Family10h
+ * rev B0 and later */
+void clear_ibs_nmi(void)
+{
+ on_each_cpu(APIC_clear_per_cpu, NULL, 1, 1);
+}
+
struct op_x86_model_spec const op_athlon_spec = {
.num_counters = NUM_COUNTERS,
.num_controls = NUM_CONTROLS,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 45b605f..6589703 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -26,6 +26,39 @@ struct op_msrs {
struct op_msr * controls;
};
+struct ibs_fetch_sample {
+ /* MSRC001_1031 IBS Fetch Linear Address Register */
+ unsigned int ibs_fetch_lin_addr_low;
+ unsigned int ibs_fetch_lin_addr_high;
+ /* MSRC001_1030 IBS Fetch Control Register */
+ unsigned int ibs_fetch_ctl_low;
+ unsigned int ibs_fetch_ctl_high;
+ /* MSRC001_1032 IBS Fetch Physical Address Register */
+ unsigned int ibs_fetch_phys_addr_low;
+ unsigned int ibs_fetch_phys_addr_high;
+};
+
+struct ibs_op_sample {
+ /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */
+ unsigned int ibs_op_rip_low;
+ unsigned int ibs_op_rip_high;
+ /* MSRC001_1035 IBS Op Data Register */
+ unsigned int ibs_op_data1_low;
+ unsigned int ibs_op_data1_high;
+ /* MSRC001_1036 IBS Op Data 2 Register */
+ unsigned int ibs_op_data2_low;
+ unsigned int ibs_op_data2_high;
+ /* MSRC001_1037 IBS Op Data 3 Register */
+ unsigned int ibs_op_data3_low;
+ unsigned int ibs_op_data3_high;
+ /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */
+ unsigned int ibs_dc_linear_low;
+ unsigned int ibs_dc_linear_high;
+ /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */
+ unsigned int ibs_dc_phys_low;
+ unsigned int ibs_dc_phys_high;
+};
+
struct pt_regs;
/* The model vtable abstracts the differences between
@@ -48,4 +81,13 @@ extern struct op_x86_model_spec const op_p4_spec;
extern struct op_x86_model_spec const op_p4_ht2_spec;
extern struct op_x86_model_spec const op_athlon_spec;
+/* setup AMD Family 10H IBS IRQ if needed */
+extern void setup_ibs_nmi(void);
+/* clearp AMD Family 10H IBS IRQ if needed */
+extern void clear_ibs_nmi(void);
+/* Look at the CPUID bits and set the IBS avail global flag */
+extern void check_IBS_avail(__u8 family);
+/* chech the IBS avail global flag */
+extern int IBS_avail(void);
+
#endif /* OP_X86_MODEL_H */
Hi!
> This patchset supports the new profiling hardware available in the latest AMD CPUs in the oProfile driver.
> @@ -419,9 +454,15 @@ int __init op_nmi_init(struct oprofile_operations *ops)
> break;
> case 0x10:
> model = &op_athlon_spec;
> - cpu_type = "x86-64/family10";
> + cpu_type = "x86-64/family10h";
> + break;
Is this string visible to userspace?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Mon 2008-06-09 13:50:48, Barry Kasindorf wrote:
> Signed-off-by: Barry Kasindorf <[email protected]>
>
missing changelog, lots of strange whitespace changes.
> @@ -33,7 +34,7 @@
> #include "event_buffer.h"
> #include "cpu_buffer.h"
> #include "buffer_sync.h"
> -
> +
> static LIST_HEAD(dying_tasks);
> static LIST_HEAD(dead_tasks);
> static cpumask_t marked_cpus = CPU_MASK_NONE;
I actually do not see the difference here. What is going on?
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Pavel Machek <[email protected]> writes:
> Hi!
>
>> This patchset supports the new profiling hardware available in the latest AMD CPUs in the oProfile driver.
>
>> @@ -419,9 +454,15 @@ int __init op_nmi_init(struct oprofile_operations *ops)
>> break;
>> case 0x10:
>> model = &op_athlon_spec;
>> - cpu_type = "x86-64/family10";
>> + cpu_type = "x86-64/family10h";
>> + break;
>
> Is this string visible to userspace?
Yes it is.
-Andi
Hi,
I can add a changelog, but this is all new and the comments at the head
of the first patch detail the nature of the changes.
I don't know why all those white space changes happened, I did a GIT
diff and it found them. I suspect that there is some trailing whitespace
that got removed, I will check that.
-Barry
-----Original Message-----
From: Pavel Machek [mailto:[email protected]]
Sent: Wednesday, June 11, 2008 6:54 AM
To: Kasindorf, Barry
Cc: [email protected]
Subject: Re: [PATCH 3/3] AMD Family10h+ IBS support for oProfile driver:
buffer management
On Mon 2008-06-09 13:50:48, Barry Kasindorf wrote:
> Signed-off-by: Barry Kasindorf <[email protected]>
>
missing changelog, lots of strange whitespace changes.
> @@ -33,7 +34,7 @@
> #include "event_buffer.h"
> #include "cpu_buffer.h"
> #include "buffer_sync.h"
> -
> +
> static LIST_HEAD(dying_tasks);
> static LIST_HEAD(dead_tasks);
> static cpumask_t marked_cpus = CPU_MASK_NONE;
I actually do not see the difference here. What is going on?
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures)
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Yes,
This is visible to userspace. The corresponding changes to the usermode
code have been sent to the oProfile list. The confusion is that it is
not AMD Family 10 but it is Family 16(10h). This can be confusing so we
changed it.
-Barry
-----Original Message-----
From: Andi Kleen [mailto:[email protected]]
Sent: Wednesday, June 11, 2008 8:08 AM
To: Pavel Machek
Cc: Kasindorf, Barry; [email protected]
Subject: Re: [PATCH 1/3] AMD Family10h+ IBS support for oProfile driver:
Setup routines
Pavel Machek <[email protected]> writes:
> Hi!
>
>> This patchset supports the new profiling hardware available in the
latest AMD CPUs in the oProfile driver.
>
>> @@ -419,9 +454,15 @@ int __init op_nmi_init(struct
oprofile_operations *ops)
>> break;
>> case 0x10:
>> model = &op_athlon_spec;
>> - cpu_type = "x86-64/family10";
>> + cpu_type = "x86-64/family10h";
>> + break;
>
> Is this string visible to userspace?
Yes it is.
-Andi
On Wed 2008-06-11 09:29:29, Kasindorf, Barry wrote:
> Yes,
> This is visible to userspace. The corresponding changes to the usermode
> code have been sent to the oProfile list. The confusion is that it is
> not AMD Family 10 but it is Family 16(10h). This can be confusing so we
> changed it.
...so we require users to upgrade userspace tools at the same moment
they upgrade kernel, and they can't go to the old kernel. Bad.
(Please mention userspace change in changelog).
I don't think potential for confusion is that big... and perhaps you
can do s/10/10h/ in userspace tools?
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
This is a thorny problem.
A mistake was made in the name almost 2 years ago. The longer we leave
it wrong the harder it will be to change. To use IBS, the major feature
added by this patch, you HAVE to upgrade the user tools anyway. If there
was a way to have the driver send both I would have done that. So far it
is a very narrow window of time that would have a problem.
If I get too much pushback I can leave it, but the code that sends
Family10 has not been present in the kernel for very long at this point,
and it should be fixed. When the patch that added Family10 went in it
had the same issue, before it would have reported Family8 and worked
with the older userspace tools, adding this would get a cpu type unset
error.
-Barry
-----Original Message-----
From: Pavel Machek [mailto:[email protected]]
Sent: Wednesday, June 11, 2008 2:01 PM
To: Kasindorf, Barry
Cc: Andi Kleen; [email protected]
Subject: Re: [PATCH 1/3] AMD Family10h+ IBS support for oProfile driver:
Setup routines
On Wed 2008-06-11 09:29:29, Kasindorf, Barry wrote:
> Yes,
> This is visible to userspace. The corresponding changes to the
usermode
> code have been sent to the oProfile list. The confusion is that it is
> not AMD Family 10 but it is Family 16(10h). This can be confusing so
we
> changed it.
...so we require users to upgrade userspace tools at the same moment
they upgrade kernel, and they can't go to the old kernel. Bad.
(Please mention userspace change in changelog).
I don't think potential for confusion is that big... and perhaps you
can do s/10/10h/ in userspace tools?
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures)
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Thu, 3 Jul 2008 11:20:52 +0200 Ingo Molnar <[email protected]> wrote:
> 1) please rebase your patches ontop of -mm, which carries the oprofile
> multiplexing cleanups and enhancements already which heavily interact
> with your patchset.
I dropped those. Because it's obvious that significant changes are
to be made. So I guess working against linux-next is appropriate for
this change.
* Barry Kasindorf <[email protected]> wrote:
> This patchset supports the new profiling hardware available in the
> latest AMD CPUs in the oProfile driver.
Looks interesting! There are some patch logistics issues:
1) please rebase your patches ontop of -mm, which carries the oprofile
multiplexing cleanups and enhancements already which heavily interact
with your patchset.
2) -mm is also based on linux-next which has a few other changes in
drivers/oprofile as well: i did a test-merge of your patches and 3/3
interacted heavily with API changes in linux-next. Part of the problem
was that you mixed whitespace changes into your patch which randomly
iteracted with other stuff. Cleanups are nice and welcome, but try to
keep them in a separate patch. (multiple patches if the situation
requires it)
3) please Cc: the x86 maintainers too because these patches change quite
a few things in the x86 architecture code.
also:
> case 0x10:
> model = &op_athlon_spec;
> - cpu_type = "x86-64/family10";
> + cpu_type = "x86-64/family10h";
> + break;
please just keep the ABI string at "x86-64/family10" - we can live with
this small quirk forever. The mess to remove that quirk is much larger
than the benefit it brings (which is small to non-existent).
[ you really dont want such a small issue block your patches.]
the next one is OK:
> + case 0x11:
> + model = &op_athlon_spec;
> + cpu_type = "x86-64/family11h";
as it's really 0x11.
Ingo
* Andrew Morton <[email protected]> wrote:
> On Thu, 3 Jul 2008 11:20:52 +0200 Ingo Molnar <[email protected]> wrote:
>
> > 1) please rebase your patches ontop of -mm, which carries the
> > oprofile multiplexing cleanups and enhancements already which
> > heavily interact with your patchset.
>
> I dropped those. Because it's obvious that significant changes are to
> be made. So I guess working against linux-next is appropriate for
> this change.
hm, okay. All the pending oprofile enhancements are nice in principle
and there's like 3-4 oprofile topics that we should really start
reviewing, integrating and testing:
- the gist of the perfmon2 changes which make oprofile work across
context-switches. Oprofile should have done this from the get go.
- the oprofile syscall enhancements from perfmon2 for lightning-fast
profiling feedback.
- oprofile multiplexing: makes hw counter constraints largely invisible.
Should have been done years ago.
- new hw support like this IBS patch but it would also be nice to see
BTS/PEBS support perhaps.
- [ personally i'd love to see /debug/oprofile/prof.txt that would do
user-space-less parsing of oprofile data and would in essence
replace readprofile for all practical purposes. (while still being
as simple and self-contained as readprofile) Full in-kernel
generation of human-readable text output. This would be the thing
that would bring oprofile a lot closer to the average kernel
developer IMO. (Also, various knobs under /debug/oprofile/* to
configure oprofile details on the fly, without any userspace
dependencies.) ]
- [ oprofile .config driven in-kernel self-tests. Right now we notice it
quite late when it breaks. If we do /debug/oprofile/ that would be a
great step towards that. ]
tip/x86/oprofile (which already has some smaller changes) could host
them, if there's interest. It would be renamed to tip/oprofile or so,
kept separate and exported separately for import into -mm and/or
linux-next - not mixed into x86 and other -tip bits.
v2.6.28 stuff at the earliest, obviously.
Ingo
I have re-worked the IBS patchset and will break it into smaller pieces
and re-submit it RSN.
I will rebase it against -mm or Linux-next whichever is best. I need
Robert's AMD Family10h EPCI patches for this to build so that has to
already be there in whichever tree I use.
This patchset does interact with Jason's multiplexing changes but we can
work that out.
-Barry
-----Original Message-----
From: Ingo Molnar [mailto:[email protected]]
Sent: Thursday, July 03, 2008 6:01 AM
To: Andrew Morton
Cc: Kasindorf, Barry; [email protected]; Richter, Robert; the
arch/x86 maintainers; Stephane Eranian; Yeh, Jason; Arjan van de Ven;
Markus Metzger
Subject: Re: [PATCH 1/3] AMD Family10h+ IBS support for oProfile
driver:Setup routines
* Andrew Morton <[email protected]> wrote:
> On Thu, 3 Jul 2008 11:20:52 +0200 Ingo Molnar <[email protected]> wrote:
>
> > 1) please rebase your patches ontop of -mm, which carries the
> > oprofile multiplexing cleanups and enhancements already which
> > heavily interact with your patchset.
>
> I dropped those. Because it's obvious that significant changes are to
> be made. So I guess working against linux-next is appropriate for
> this change.
hm, okay. All the pending oprofile enhancements are nice in principle
and there's like 3-4 oprofile topics that we should really start
reviewing, integrating and testing:
- the gist of the perfmon2 changes which make oprofile work across
context-switches. Oprofile should have done this from the get go.
- the oprofile syscall enhancements from perfmon2 for lightning-fast
profiling feedback.
- oprofile multiplexing: makes hw counter constraints largely invisible.
Should have been done years ago.
- new hw support like this IBS patch but it would also be nice to see
BTS/PEBS support perhaps.
- [ personally i'd love to see /debug/oprofile/prof.txt that would do
user-space-less parsing of oprofile data and would in essence
replace readprofile for all practical purposes. (while still being
as simple and self-contained as readprofile) Full in-kernel
generation of human-readable text output. This would be the thing
that would bring oprofile a lot closer to the average kernel
developer IMO. (Also, various knobs under /debug/oprofile/* to
configure oprofile details on the fly, without any userspace
dependencies.) ]
- [ oprofile .config driven in-kernel self-tests. Right now we notice it
quite late when it breaks. If we do /debug/oprofile/ that would be a
great step towards that. ]
tip/x86/oprofile (which already has some smaller changes) could host
them, if there's interest. It would be renamed to tip/oprofile or so,
kept separate and exported separately for import into -mm and/or
linux-next - not mixed into x86 and other -tip bits.
v2.6.28 stuff at the earliest, obviously.
Ingo