Subject: [PATCH 0/23] updates for oprofile

These patches are updates for oprofile, mainly for x86. Most patches
for x86 replace many macro definitions by function calls and implement
internal 64 bit usage for MSR access. The code is now better readable
and maintainable. Future changes can be made easier.

Also, struct op_x86_model_spec has been extended to better describe
the x86 model. This patch series introduces common functions that can
be used by all x86 models. It is a first step to merge similar model
specific code and to get a common code base for all models.

Another change is the rework of arch_perfmon init code. There was
model specific code in nmi_int.c. This code is moved to
op_model_ppro.c by using the init function pointer in struct
op_x86_model_spec.

There are no functional changes in this patch set, it only contains
cleanups and reworks.

The following changes since commit 0886751c5d8b19fcee2e65d34ae21c9111e652a9:
Robert Richter (1):
Merge commit 'v2.6.30' into oprofile/master

are available in the git repository at:

git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile.git master

-Robert

-----------

Robert Richter (23):
Revert "oprofile: discover counters for op ppro too"
x86/oprofile: moving arch_perfmon counter setup to op_x86_model_spec.init
x86/oprofile: minor style changes in struct op_x86_model_spec
oprofile: remove irq_flags in struct op_entry
oprofile: remove obselete include headers
x86/oprofile: remove #ifdefs in ibs functions
x86/oprofile: simplify AMD cpu init code
x86/oprofile: move common macros to op_x86_model.h
x86/oprofile: remove MSR macros for AMD cpus
x86/oprofile: remove MSR macros for ppro cpus
x86/oprofile: remove MSR macros for p4 cpus
x86/oprofile: fix and cleanup CTRL_SET_* macros
x86/oprofile: remove unused macros for AMD virtualization profiling
x86/oprofile: pass the model to setup_ctrs() functions
x86/oprofile: replace macros to calculate control register
x86/oprofile: replace CTR_OVERFLOWED macros
x86/oprofile: replace CTRL_SET_*ACTIVE macros
x86/oprofile: replace CTR*_IS_RESERVED macros
x86/oprofile: use 64 bit wrmsr functions
x86/oprofile: use 64 bit values to save MSR states
x86/oprofile: remove some local variables in MSR save/restore functions
x86/oprofile: use 64 bit values in IBS functions
x86/oprofile: introduce oprofile_add_data64()

arch/x86/oprofile/nmi_int.c | 98 +++++++-------
arch/x86/oprofile/op_model_amd.c | 267 ++++++++++++++++---------------------
arch/x86/oprofile/op_model_p4.c | 60 ++++-----
arch/x86/oprofile/op_model_ppro.c | 95 ++++++--------
arch/x86/oprofile/op_x86_model.h | 47 ++++---
drivers/oprofile/cpu_buffer.c | 16 ++-
include/linux/oprofile.h | 2 +-
7 files changed, 281 insertions(+), 304 deletions(-)



Subject: [PATCH 02/23] x86/oprofile: moving arch_perfmon counter setup to op_x86_model_spec.init

The function arch_perfmon_init() in nmi_int.c is model specific. This
patch moves it to op_model_ppro.c by using the init function pointer
in struct op_x86_model_spec.

Cc: Andi Kleen <[email protected]>
Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/nmi_int.c | 21 +++++++++------------
arch/x86/oprofile/op_model_ppro.c | 9 ++++++++-
arch/x86/oprofile/op_x86_model.h | 2 --
3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 3b285e6..dd85153 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -427,7 +427,7 @@ static int __init ppro_init(char **cpu_type)
*cpu_type = "i386/core_2";
break;
case 26:
- arch_perfmon_setup_counters();
+ model = &op_arch_perfmon_spec;
*cpu_type = "i386/core_i7";
break;
case 28:
@@ -442,16 +442,6 @@ static int __init ppro_init(char **cpu_type)
return 1;
}

-static int __init arch_perfmon_init(char **cpu_type)
-{
- if (!cpu_has_arch_perfmon)
- return 0;
- *cpu_type = "i386/arch_perfmon";
- model = &op_arch_perfmon_spec;
- arch_perfmon_setup_counters();
- return 1;
-}
-
/* in order to get sysfs right */
static int using_nmi;

@@ -509,8 +499,15 @@ int __init op_nmi_init(struct oprofile_operations *ops)
break;
}

- if (!cpu_type && !arch_perfmon_init(&cpu_type))
+ if (cpu_type)
+ break;
+
+ if (!cpu_has_arch_perfmon)
return -ENODEV;
+
+ /* use arch perfmon as fallback */
+ cpu_type = "i386/arch_perfmon";
+ model = &op_arch_perfmon_spec;
break;

default:
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 2a12399..ae58119 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -233,7 +233,7 @@ struct op_x86_model_spec const op_ppro_spec = {
* the specific CPU.
*/

-void arch_perfmon_setup_counters(void)
+static void arch_perfmon_setup_counters(void)
{
union cpuid10_eax eax;

@@ -253,7 +253,14 @@ void arch_perfmon_setup_counters(void)
op_arch_perfmon_spec.num_controls = num_counters;
}

+static int arch_perfmon_init(struct oprofile_operations *ignore)
+{
+ arch_perfmon_setup_counters();
+ return 0;
+}
+
struct op_x86_model_spec op_arch_perfmon_spec = {
+ .init = &arch_perfmon_init,
/* num_counters/num_controls filled in at runtime */
.fill_in_addresses = &ppro_fill_in_addresses,
/* user space does the cpuid check for available events */
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 2317149..ed27783 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -51,6 +51,4 @@ extern struct op_x86_model_spec const op_p4_ht2_spec;
extern struct op_x86_model_spec const op_amd_spec;
extern struct op_x86_model_spec op_arch_perfmon_spec;

-extern void arch_perfmon_setup_counters(void);
-
#endif /* OP_X86_MODEL_H */
--
1.6.3.1

Subject: [PATCH 03/23] x86/oprofile: minor style changes in struct op_x86_model_spec

Some vertical alignments. Variables are now located in the beginning
of the struct.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_x86_model.h | 22 +++++++++++-----------
1 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index ed27783..bd8157d 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -32,17 +32,17 @@ struct pt_regs;
* various x86 CPU models' perfctr support.
*/
struct op_x86_model_spec {
- int (*init)(struct oprofile_operations *ops);
- void (*exit)(void);
- unsigned int num_counters;
- unsigned int num_controls;
- void (*fill_in_addresses)(struct op_msrs * const msrs);
- void (*setup_ctrs)(struct op_msrs const * const msrs);
- int (*check_ctrs)(struct pt_regs * const regs,
- struct op_msrs const * const msrs);
- void (*start)(struct op_msrs const * const msrs);
- void (*stop)(struct op_msrs const * const msrs);
- void (*shutdown)(struct op_msrs const * const msrs);
+ unsigned int num_counters;
+ unsigned int num_controls;
+ int (*init)(struct oprofile_operations *ops);
+ void (*exit)(void);
+ void (*fill_in_addresses)(struct op_msrs * const msrs);
+ void (*setup_ctrs)(struct op_msrs const * const msrs);
+ int (*check_ctrs)(struct pt_regs * const regs,
+ struct op_msrs const * const msrs);
+ void (*start)(struct op_msrs const * const msrs);
+ void (*stop)(struct op_msrs const * const msrs);
+ void (*shutdown)(struct op_msrs const * const msrs);
};

extern struct op_x86_model_spec const op_ppro_spec;
--
1.6.3.1

Subject: [PATCH 01/23] Revert "oprofile: discover counters for op ppro too"

This reverts commit 59512900baab03c5629f2ff5efad1d5d4e682ece.

arch_perfmon_setup_counters() is actually never called for ppro, so
there is no code that changes the numbers in op_ppro_spec. The patch
as it is has no effect.

Cc: Andi Kleen <[email protected]>
Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_ppro.c | 8 +++-----
arch/x86/oprofile/op_x86_model.h | 2 +-
2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 10131fb..2a12399 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -213,9 +213,9 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
}


-struct op_x86_model_spec op_ppro_spec = {
- .num_counters = 2, /* can be overriden */
- .num_controls = 2, /* dito */
+struct op_x86_model_spec const op_ppro_spec = {
+ .num_counters = 2,
+ .num_controls = 2,
.fill_in_addresses = &ppro_fill_in_addresses,
.setup_ctrs = &ppro_setup_ctrs,
.check_ctrs = &ppro_check_ctrs,
@@ -251,8 +251,6 @@ void arch_perfmon_setup_counters(void)

op_arch_perfmon_spec.num_counters = num_counters;
op_arch_perfmon_spec.num_controls = num_counters;
- op_ppro_spec.num_counters = num_counters;
- op_ppro_spec.num_controls = num_counters;
}

struct op_x86_model_spec op_arch_perfmon_spec = {
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 825e790..2317149 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -45,7 +45,7 @@ struct op_x86_model_spec {
void (*shutdown)(struct op_msrs const * const msrs);
};

-extern struct op_x86_model_spec op_ppro_spec;
+extern struct op_x86_model_spec const op_ppro_spec;
extern struct op_x86_model_spec const op_p4_spec;
extern struct op_x86_model_spec const op_p4_ht2_spec;
extern struct op_x86_model_spec const op_amd_spec;
--
1.6.3.1

Subject: [PATCH 05/23] oprofile: remove obselete include headers

This became obsolete with this commit:

6dad828 oprofile: port to the new ring_buffer

Signed-off-by: Robert Richter <[email protected]>
---
drivers/oprofile/cpu_buffer.c | 1 -
1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 242257b..50640cc 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -21,7 +21,6 @@

#include <linux/sched.h>
#include <linux/oprofile.h>
-#include <linux/vmalloc.h>
#include <linux/errno.h>

#include "event_buffer.h"
--
1.6.3.1

Subject: [PATCH 17/23] x86/oprofile: replace CTRL_SET_*ACTIVE macros

The patch replaces all CTRL_SET_*ACTIVE macros. 64 bit MSR functions
and 64 bit counter values are used now. The code uses bit masks from
<asm/intel_arch_perfmon.h>.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 16 ++++++++--------
arch/x86/oprofile/op_model_ppro.c | 16 ++++++++--------
arch/x86/oprofile/op_x86_model.h | 2 --
3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index b5d678f..4ac9d28 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -262,13 +262,13 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,

static void op_amd_start(struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
if (reset_value[i]) {
- rdmsr(msrs->controls[i].addr, low, high);
- CTRL_SET_ACTIVE(low);
- wrmsr(msrs->controls[i].addr, low, high);
+ rdmsrl(msrs->controls[i].addr, val);
+ val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(msrs->controls[i].addr, val);
}
}

@@ -277,7 +277,7 @@ static void op_amd_start(struct op_msrs const * const msrs)

static void op_amd_stop(struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;

/*
@@ -287,9 +287,9 @@ static void op_amd_stop(struct op_msrs const * const msrs)
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
if (!reset_value[i])
continue;
- rdmsr(msrs->controls[i].addr, low, high);
- CTRL_SET_INACTIVE(low);
- wrmsr(msrs->controls[i].addr, low, high);
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(msrs->controls[i].addr, val);
}

op_amd_stop_ibs();
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 82db396..566b43f 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -145,16 +145,16 @@ static int ppro_check_ctrs(struct pt_regs * const regs,

static void ppro_start(struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;

if (!reset_value)
return;
for (i = 0; i < num_counters; ++i) {
if (reset_value[i]) {
- rdmsr(msrs->controls[i].addr, low, high);
- CTRL_SET_ACTIVE(low);
- wrmsr(msrs->controls[i].addr, low, high);
+ rdmsrl(msrs->controls[i].addr, val);
+ val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(msrs->controls[i].addr, val);
}
}
}
@@ -162,7 +162,7 @@ static void ppro_start(struct op_msrs const * const msrs)

static void ppro_stop(struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;

if (!reset_value)
@@ -170,9 +170,9 @@ static void ppro_stop(struct op_msrs const * const msrs)
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
- rdmsr(msrs->controls[i].addr, low, high);
- CTRL_SET_INACTIVE(low);
- wrmsr(msrs->controls[i].addr, low, high);
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(msrs->controls[i].addr, val);
}
}

diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 3220d4c..1c45777 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -17,8 +17,6 @@

#define CTR_IS_RESERVED(msrs, c) ((msrs)->counters[(c)].addr ? 1 : 0)
#define CTRL_IS_RESERVED(msrs, c) ((msrs)->controls[(c)].addr ? 1 : 0)
-#define CTRL_SET_ACTIVE(val) ((val) |= ARCH_PERFMON_EVENTSEL0_ENABLE)
-#define CTRL_SET_INACTIVE(val) ((val) &= ~ARCH_PERFMON_EVENTSEL0_ENABLE)

struct op_saved_msr {
unsigned int high;
--
1.6.3.1

Subject: [PATCH 08/23] x86/oprofile: move common macros to op_x86_model.h

There are duplicate macro implementations in model specific code. This
patch moves all common macros to op_x86_model.h.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 8 --------
arch/x86/oprofile/op_model_p4.c | 2 --
arch/x86/oprofile/op_model_ppro.c | 8 --------
arch/x86/oprofile/op_x86_model.h | 9 +++++++++
4 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index b54c088..4b9254a 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -26,22 +26,14 @@
#define NUM_COUNTERS 4
#define NUM_CONTROLS 4

-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))

-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
#define CTRL_CLEAR_LO(x) (x &= (1<<21))
#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 819b131..420c15e 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -366,8 +366,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))

-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index ae58119..a922a1a 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -26,19 +26,11 @@
static int num_counters = 2;
static int counter_width = 32;

-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))

-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
#define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
#define CTRL_CLEAR(x) (x &= (1<<21))
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
#define CTRL_SET_EVENT(val, e) (val |= e)

static u64 *reset_value;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index bd8157d..c80ec7d 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -11,6 +11,15 @@
#ifndef OP_X86_MODEL_H
#define OP_X86_MODEL_H

+#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
+#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
+
struct op_saved_msr {
unsigned int high;
unsigned int low;
--
1.6.3.1

Subject: [PATCH 18/23] x86/oprofile: replace CTR*_IS_RESERVED macros

The patch replaces all CTR*_IS_RESERVED macros.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 10 +++++-----
arch/x86/oprofile/op_model_p4.c | 10 +++++-----
arch/x86/oprofile/op_model_ppro.c | 10 +++++-----
arch/x86/oprofile/op_x86_model.h | 3 ---
4 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 4ac9d28..c5c5eec 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -90,7 +90,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,

/* clear all counters */
for (i = 0 ; i < NUM_CONTROLS; ++i) {
- if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+ if (unlikely(!msrs->controls[i].addr))
continue;
rdmsrl(msrs->controls[i].addr, val);
val &= model->reserved;
@@ -99,14 +99,14 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,

/* avoid a false detection of ctr overflows in NMI handler */
for (i = 0; i < NUM_COUNTERS; ++i) {
- if (unlikely(!CTR_IS_RESERVED(msrs, i)))
+ if (unlikely(!msrs->counters[i].addr))
continue;
wrmsr(msrs->counters[i].addr, -1, -1);
}

/* enable active counters */
for (i = 0; i < NUM_COUNTERS; ++i) {
- if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
+ if (counter_config[i].enabled && msrs->counters[i].addr) {
reset_value[i] = counter_config[i].count;
wrmsr(msrs->counters[i].addr, -(unsigned int)counter_config[i].count, -1);
rdmsrl(msrs->controls[i].addr, val);
@@ -300,11 +300,11 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
int i;

for (i = 0 ; i < NUM_COUNTERS ; ++i) {
- if (CTR_IS_RESERVED(msrs, i))
+ if (msrs->counters[i].addr)
release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
}
for (i = 0 ; i < NUM_CONTROLS ; ++i) {
- if (CTRL_IS_RESERVED(msrs, i))
+ if (msrs->controls[i].addr)
release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
}
}
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index ac4ca28..9db0ca9 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -559,7 +559,7 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model,

/* clear the cccrs we will use */
for (i = 0 ; i < num_counters ; i++) {
- if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+ if (unlikely(!msrs->controls[i].addr))
continue;
rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
CCCR_CLEAR(low);
@@ -569,14 +569,14 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model,

/* clear all escrs (including those outside our concern) */
for (i = num_counters; i < num_controls; i++) {
- if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+ if (unlikely(!msrs->controls[i].addr))
continue;
wrmsr(msrs->controls[i].addr, 0, 0);
}

/* setup all counters */
for (i = 0 ; i < num_counters ; ++i) {
- if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
+ if (counter_config[i].enabled && msrs->controls[i].addr) {
reset_value[i] = counter_config[i].count;
pmc_setup_one_p4_counter(i);
wrmsr(p4_counters[VIRT_CTR(stag, i)].counter_address,
@@ -679,7 +679,7 @@ static void p4_shutdown(struct op_msrs const * const msrs)
int i;

for (i = 0 ; i < num_counters ; ++i) {
- if (CTR_IS_RESERVED(msrs, i))
+ if (msrs->counters[i].addr)
release_perfctr_nmi(msrs->counters[i].addr);
}
/*
@@ -688,7 +688,7 @@ static void p4_shutdown(struct op_msrs const * const msrs)
* This saves a few bits.
*/
for (i = num_counters ; i < num_controls ; ++i) {
- if (CTRL_IS_RESERVED(msrs, i))
+ if (msrs->controls[i].addr)
release_evntsel_nmi(msrs->controls[i].addr);
}
}
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 566b43f..0a261a5 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -82,7 +82,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,

/* clear all counters */
for (i = 0 ; i < num_counters; ++i) {
- if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+ if (unlikely(!msrs->controls[i].addr))
continue;
rdmsrl(msrs->controls[i].addr, val);
val &= model->reserved;
@@ -91,14 +91,14 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,

/* avoid a false detection of ctr overflows in NMI handler */
for (i = 0; i < num_counters; ++i) {
- if (unlikely(!CTR_IS_RESERVED(msrs, i)))
+ if (unlikely(!msrs->counters[i].addr))
continue;
wrmsrl(msrs->counters[i].addr, -1LL);
}

/* enable active counters */
for (i = 0; i < num_counters; ++i) {
- if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
+ if (counter_config[i].enabled && msrs->counters[i].addr) {
reset_value[i] = counter_config[i].count;
wrmsrl(msrs->counters[i].addr, -reset_value[i]);
rdmsrl(msrs->controls[i].addr, val);
@@ -181,11 +181,11 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
int i;

for (i = 0 ; i < num_counters ; ++i) {
- if (CTR_IS_RESERVED(msrs, i))
+ if (msrs->counters[i].addr)
release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
}
for (i = 0 ; i < num_counters ; ++i) {
- if (CTRL_IS_RESERVED(msrs, i))
+ if (msrs->controls[i].addr)
release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
}
if (reset_value) {
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 1c45777..69f1eb4 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -15,9 +15,6 @@
#include <asm/types.h>
#include <asm/intel_arch_perfmon.h>

-#define CTR_IS_RESERVED(msrs, c) ((msrs)->counters[(c)].addr ? 1 : 0)
-#define CTRL_IS_RESERVED(msrs, c) ((msrs)->controls[(c)].addr ? 1 : 0)
-
struct op_saved_msr {
unsigned int high;
unsigned int low;
--
1.6.3.1

Subject: [PATCH 12/23] x86/oprofile: fix and cleanup CTRL_SET_* macros

This patch fixes missing braces around macro parameters. Macro
definitions from intel_arch_perfmon.h are used where possible.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_ppro.c | 1 -
arch/x86/oprofile/op_x86_model.h | 18 ++++++++++--------
2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 6c5d288..61ee8f6 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -18,7 +18,6 @@
#include <asm/msr.h>
#include <asm/apic.h>
#include <asm/nmi.h>
-#include <asm/intel_arch_perfmon.h>

#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index c80ec7d..a207b1c 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -11,14 +11,16 @@
#ifndef OP_X86_MODEL_H
#define OP_X86_MODEL_H

-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
-#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
-#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
+#include <asm/intel_arch_perfmon.h>
+
+#define CTR_IS_RESERVED(msrs, c) ((msrs)->counters[(c)].addr ? 1 : 0)
+#define CTRL_IS_RESERVED(msrs, c) ((msrs)->controls[(c)].addr ? 1 : 0)
+#define CTRL_SET_ACTIVE(val) ((val) |= ARCH_PERFMON_EVENTSEL0_ENABLE)
+#define CTRL_SET_ENABLE(val) ((val) |= ARCH_PERFMON_EVENTSEL_INT)
+#define CTRL_SET_INACTIVE(val) ((val) &= ~ARCH_PERFMON_EVENTSEL0_ENABLE)
+#define CTRL_SET_KERN(val, k) ((val) |= ((k) ? ARCH_PERFMON_EVENTSEL_OS : 0))
+#define CTRL_SET_USR(val, u) ((val) |= ((u) ? ARCH_PERFMON_EVENTSEL_USR : 0))
+#define CTRL_SET_UM(val, m) ((val) |= ((m) << 8))

struct op_saved_msr {
unsigned int high;
--
1.6.3.1

Subject: [PATCH 04/23] oprofile: remove irq_flags in struct op_entry

This became obsolete with this commit:

304cc6a ring_buffer: remove unused flags parameter, fix

Signed-off-by: Robert Richter <[email protected]>
---
include/linux/oprofile.h | 1 -
1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 1d9518b..dbbe2db 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -171,7 +171,6 @@ struct op_sample;
struct op_entry {
struct ring_buffer_event *event;
struct op_sample *sample;
- unsigned long irq_flags;
unsigned long size;
unsigned long *data;
};
--
1.6.3.1

Subject: [PATCH 19/23] x86/oprofile: use 64 bit wrmsr functions

This patch replaces some wrmsr() functions with wrmsrl().

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 7 ++++---
arch/x86/oprofile/op_model_p4.c | 12 ++++++------
2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index c5c5eec..9bf9017 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -101,14 +101,15 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
for (i = 0; i < NUM_COUNTERS; ++i) {
if (unlikely(!msrs->counters[i].addr))
continue;
- wrmsr(msrs->counters[i].addr, -1, -1);
+ wrmsrl(msrs->counters[i].addr, -1LL);
}

/* enable active counters */
for (i = 0; i < NUM_COUNTERS; ++i) {
if (counter_config[i].enabled && msrs->counters[i].addr) {
reset_value[i] = counter_config[i].count;
- wrmsr(msrs->counters[i].addr, -(unsigned int)counter_config[i].count, -1);
+ wrmsrl(msrs->counters[i].addr,
+ -(s64)counter_config[i].count);
rdmsrl(msrs->controls[i].addr, val);
val &= model->reserved;
val |= op_x86_get_ctrl(model, &counter_config[i]);
@@ -251,7 +252,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
if (val & OP_CTR_OVERFLOW)
continue;
oprofile_add_sample(regs, i);
- wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1);
+ wrmsrl(msrs->counters[i].addr, -(s64)reset_value[i]);
}

op_amd_handle_ibs(regs, msrs);
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 9db0ca9..f01e53b 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -579,8 +579,8 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model,
if (counter_config[i].enabled && msrs->controls[i].addr) {
reset_value[i] = counter_config[i].count;
pmc_setup_one_p4_counter(i);
- wrmsr(p4_counters[VIRT_CTR(stag, i)].counter_address,
- -(u32)counter_config[i].count, -1);
+ wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
+ -(s64)counter_config[i].count);
} else {
reset_value[i] = 0;
}
@@ -624,12 +624,12 @@ static int p4_check_ctrs(struct pt_regs * const regs,
rdmsr(p4_counters[real].counter_address, ctr, high);
if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
oprofile_add_sample(regs, i);
- wrmsr(p4_counters[real].counter_address,
- -(u32)reset_value[i], -1);
+ wrmsrl(p4_counters[real].counter_address,
+ -(s64)reset_value[i]);
CCCR_CLEAR_OVF(low);
wrmsr(p4_counters[real].cccr_address, low, high);
- wrmsr(p4_counters[real].counter_address,
- -(u32)reset_value[i], -1);
+ wrmsrl(p4_counters[real].counter_address,
+ -(s64)reset_value[i]);
}
}

--
1.6.3.1

Subject: [PATCH 09/23] x86/oprofile: remove MSR macros for AMD cpus

The macros CTRL_READ() and CTRL_WRITE() make the code hard to read and
maintain. This patch replaces them by rdmsr()/wrmsr() functions and
simplifies the code.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 29 ++++++++++++-----------------
1 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 4b9254a..c6181c2 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -26,12 +26,7 @@
#define NUM_COUNTERS 4
#define NUM_CONTROLS 4

-#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
-#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
-
-#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
-#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
#define CTRL_CLEAR_LO(x) (x &= (1<<21))
#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
@@ -101,17 +96,17 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
for (i = 0 ; i < NUM_CONTROLS; ++i) {
if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
continue;
- CTRL_READ(low, high, msrs, i);
+ rdmsr(msrs->controls[i].addr, low, high);
CTRL_CLEAR_LO(low);
CTRL_CLEAR_HI(high);
- CTRL_WRITE(low, high, msrs, i);
+ wrmsr(msrs->controls[i].addr, low, high);
}

/* avoid a false detection of ctr overflows in NMI handler */
for (i = 0; i < NUM_COUNTERS; ++i) {
if (unlikely(!CTR_IS_RESERVED(msrs, i)))
continue;
- CTR_WRITE(1, msrs, i);
+ wrmsr(msrs->counters[i].addr, -1, -1);
}

/* enable active counters */
@@ -119,9 +114,9 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
reset_value[i] = counter_config[i].count;

- CTR_WRITE(counter_config[i].count, msrs, i);
+ wrmsr(msrs->counters[i].addr, -(unsigned int)counter_config[i].count, -1);

- CTRL_READ(low, high, msrs, i);
+ rdmsr(msrs->controls[i].addr, low, high);
CTRL_CLEAR_LO(low);
CTRL_CLEAR_HI(high);
CTRL_SET_ENABLE(low);
@@ -133,7 +128,7 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
CTRL_SET_HOST_ONLY(high, 0);
CTRL_SET_GUEST_ONLY(high, 0);

- CTRL_WRITE(low, high, msrs, i);
+ wrmsr(msrs->controls[i].addr, low, high);
} else {
reset_value[i] = 0;
}
@@ -267,10 +262,10 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
for (i = 0 ; i < NUM_COUNTERS; ++i) {
if (!reset_value[i])
continue;
- CTR_READ(low, high, msrs, i);
+ rdmsr(msrs->counters[i].addr, low, high);
if (CTR_OVERFLOWED(low)) {
oprofile_add_sample(regs, i);
- CTR_WRITE(reset_value[i], msrs, i);
+ wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1);
}
}

@@ -286,9 +281,9 @@ static void op_amd_start(struct op_msrs const * const msrs)
int i;
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
if (reset_value[i]) {
- CTRL_READ(low, high, msrs, i);
+ rdmsr(msrs->controls[i].addr, low, high);
CTRL_SET_ACTIVE(low);
- CTRL_WRITE(low, high, msrs, i);
+ wrmsr(msrs->controls[i].addr, low, high);
}
}

@@ -307,9 +302,9 @@ static void op_amd_stop(struct op_msrs const * const msrs)
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
if (!reset_value[i])
continue;
- CTRL_READ(low, high, msrs, i);
+ rdmsr(msrs->controls[i].addr, low, high);
CTRL_SET_INACTIVE(low);
- CTRL_WRITE(low, high, msrs, i);
+ wrmsr(msrs->controls[i].addr, low, high);
}

op_amd_stop_ibs();
--
1.6.3.1

Subject: [PATCH 11/23] x86/oprofile: remove MSR macros for p4 cpus

The macros CTRL_READ() and CTRL_WRITE() make the code hard to read and
maintain. This patch replaces them by rdmsr()/wrmsr() functions and
simplifies the code.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_p4.c | 39 +++++++++++++++++++--------------------
1 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 420c15e..365d8a9 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -350,8 +350,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
-#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
-#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)

#define CCCR_RESERVED_BITS 0x38030FFF
#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
@@ -361,13 +359,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
-#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
-#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))

-#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
-#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))


@@ -513,7 +507,7 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
if (ev->bindings[i].virt_counter & counter_bit) {

/* modify ESCR */
- ESCR_READ(escr, high, ev, i);
+ rdmsr(ev->bindings[i].escr_address, escr, high);
ESCR_CLEAR(escr);
if (stag == 0) {
ESCR_SET_USR_0(escr, counter_config[ctr].user);
@@ -524,10 +518,11 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
}
ESCR_SET_EVENT_SELECT(escr, ev->event_select);
ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
- ESCR_WRITE(escr, high, ev, i);
+ wrmsr(ev->bindings[i].escr_address, escr, high);

/* modify CCCR */
- CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+ rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
+ cccr, high);
CCCR_CLEAR(cccr);
CCCR_SET_REQUIRED_BITS(cccr);
CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
@@ -535,7 +530,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
CCCR_SET_PMI_OVF_0(cccr);
else
CCCR_SET_PMI_OVF_1(cccr);
- CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+ wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
+ cccr, high);
return;
}
}
@@ -582,7 +578,8 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
reset_value[i] = counter_config[i].count;
pmc_setup_one_p4_counter(i);
- CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+ wrmsr(p4_counters[VIRT_CTR(stag, i)].counter_address,
+ -(u32)counter_config[i].count, -1);
} else {
reset_value[i] = 0;
}
@@ -622,14 +619,16 @@ static int p4_check_ctrs(struct pt_regs * const regs,

real = VIRT_CTR(stag, i);

- CCCR_READ(low, high, real);
- CTR_READ(ctr, high, real);
+ rdmsr(p4_counters[real].cccr_address, low, high);
+ rdmsr(p4_counters[real].counter_address, ctr, high);
if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
oprofile_add_sample(regs, i);
- CTR_WRITE(reset_value[i], real);
+ wrmsr(p4_counters[real].counter_address,
+ -(u32)reset_value[i], -1);
CCCR_CLEAR_OVF(low);
- CCCR_WRITE(low, high, real);
- CTR_WRITE(reset_value[i], real);
+ wrmsr(p4_counters[real].cccr_address, low, high);
+ wrmsr(p4_counters[real].counter_address,
+ -(u32)reset_value[i], -1);
}
}

@@ -651,9 +650,9 @@ static void p4_start(struct op_msrs const * const msrs)
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
- CCCR_READ(low, high, VIRT_CTR(stag, i));
+ rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
CCCR_SET_ENABLE(low);
- CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+ wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
}
}

@@ -668,9 +667,9 @@ static void p4_stop(struct op_msrs const * const msrs)
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
- CCCR_READ(low, high, VIRT_CTR(stag, i));
+ rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
CCCR_SET_DISABLE(low);
- CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+ wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
}
}

--
1.6.3.1

Subject: [PATCH 10/23] x86/oprofile: remove MSR macros for ppro cpus

The macros CTRL_READ() and CTRL_WRITE() make the code hard to read and
maintain. This patch replaces them by rdmsr()/wrmsr() functions and
simplifies the code.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_ppro.c | 19 ++++++++-----------
1 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index a922a1a..6c5d288 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -27,9 +27,6 @@ static int num_counters = 2;
static int counter_width = 32;

#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
-
-#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
-#define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
#define CTRL_CLEAR(x) (x &= (1<<21))
#define CTRL_SET_EVENT(val, e) (val |= e)

@@ -88,9 +85,9 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
for (i = 0 ; i < num_counters; ++i) {
if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
continue;
- CTRL_READ(low, high, msrs, i);
+ rdmsr(msrs->controls[i].addr, low, high);
CTRL_CLEAR(low);
- CTRL_WRITE(low, high, msrs, i);
+ wrmsr(msrs->controls[i].addr, low, high);
}

/* avoid a false detection of ctr overflows in NMI handler */
@@ -107,14 +104,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)

wrmsrl(msrs->counters[i].addr, -reset_value[i]);

- CTRL_READ(low, high, msrs, i);
+ rdmsr(msrs->controls[i].addr, low, high);
CTRL_CLEAR(low);
CTRL_SET_ENABLE(low);
CTRL_SET_USR(low, counter_config[i].user);
CTRL_SET_KERN(low, counter_config[i].kernel);
CTRL_SET_UM(low, counter_config[i].unit_mask);
CTRL_SET_EVENT(low, counter_config[i].event);
- CTRL_WRITE(low, high, msrs, i);
+ wrmsr(msrs->controls[i].addr, low, high);
} else {
reset_value[i] = 0;
}
@@ -162,9 +159,9 @@ static void ppro_start(struct op_msrs const * const msrs)
return;
for (i = 0; i < num_counters; ++i) {
if (reset_value[i]) {
- CTRL_READ(low, high, msrs, i);
+ rdmsr(msrs->controls[i].addr, low, high);
CTRL_SET_ACTIVE(low);
- CTRL_WRITE(low, high, msrs, i);
+ wrmsr(msrs->controls[i].addr, low, high);
}
}
}
@@ -180,9 +177,9 @@ static void ppro_stop(struct op_msrs const * const msrs)
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
- CTRL_READ(low, high, msrs, i);
+ rdmsr(msrs->controls[i].addr, low, high);
CTRL_SET_INACTIVE(low);
- CTRL_WRITE(low, high, msrs, i);
+ wrmsr(msrs->controls[i].addr, low, high);
}
}

--
1.6.3.1

Subject: [PATCH 20/23] x86/oprofile: use 64 bit values to save MSR states

This patch removes struct op_saved_msr and replaces it by an u64
variable. This makes code easier and it is possible to use 64 bit MSR
functions.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/nmi_int.c | 28 ++++++++--------------------
arch/x86/oprofile/op_x86_model.h | 9 ++-------
2 files changed, 10 insertions(+), 27 deletions(-)

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 388ee15..3b84b78 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -78,19 +78,13 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs)
unsigned int i;

for (i = 0; i < nr_ctrs; ++i) {
- if (counters[i].addr) {
- rdmsr(counters[i].addr,
- counters[i].saved.low,
- counters[i].saved.high);
- }
+ if (counters[i].addr)
+ rdmsrl(counters[i].addr, counters[i].saved);
}

for (i = 0; i < nr_ctrls; ++i) {
- if (controls[i].addr) {
- rdmsr(controls[i].addr,
- controls[i].saved.low,
- controls[i].saved.high);
- }
+ if (controls[i].addr)
+ rdmsrl(controls[i].addr, controls[i].saved);
}
}

@@ -204,19 +198,13 @@ static void nmi_restore_registers(struct op_msrs *msrs)
unsigned int i;

for (i = 0; i < nr_ctrls; ++i) {
- if (controls[i].addr) {
- wrmsr(controls[i].addr,
- controls[i].saved.low,
- controls[i].saved.high);
- }
+ if (controls[i].addr)
+ wrmsrl(controls[i].addr, controls[i].saved);
}

for (i = 0; i < nr_ctrs; ++i) {
- if (counters[i].addr) {
- wrmsr(counters[i].addr,
- counters[i].saved.low,
- counters[i].saved.high);
- }
+ if (counters[i].addr)
+ wrmsrl(counters[i].addr, counters[i].saved);
}
}

diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 69f1eb4..fda52b4 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -15,14 +15,9 @@
#include <asm/types.h>
#include <asm/intel_arch_perfmon.h>

-struct op_saved_msr {
- unsigned int high;
- unsigned int low;
-};
-
struct op_msr {
- unsigned long addr;
- struct op_saved_msr saved;
+ unsigned long addr;
+ u64 saved;
};

struct op_msrs {
--
1.6.3.1

Subject: [PATCH 21/23] x86/oprofile: remove some local variables in MSR save/restore functions

The patch removes some local variables in these functions.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/nmi_int.c | 12 ++++--------
1 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 3b84b78..80b63d5 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -71,18 +71,16 @@ static int profile_exceptions_notify(struct notifier_block *self,

static void nmi_cpu_save_registers(struct op_msrs *msrs)
{
- unsigned int const nr_ctrs = model->num_counters;
- unsigned int const nr_ctrls = model->num_controls;
struct op_msr *counters = msrs->counters;
struct op_msr *controls = msrs->controls;
unsigned int i;

- for (i = 0; i < nr_ctrs; ++i) {
+ for (i = 0; i < model->num_counters; ++i) {
if (counters[i].addr)
rdmsrl(counters[i].addr, counters[i].saved);
}

- for (i = 0; i < nr_ctrls; ++i) {
+ for (i = 0; i < model->num_controls; ++i) {
if (controls[i].addr)
rdmsrl(controls[i].addr, controls[i].saved);
}
@@ -191,18 +189,16 @@ static int nmi_setup(void)

static void nmi_restore_registers(struct op_msrs *msrs)
{
- unsigned int const nr_ctrs = model->num_counters;
- unsigned int const nr_ctrls = model->num_controls;
struct op_msr *counters = msrs->counters;
struct op_msr *controls = msrs->controls;
unsigned int i;

- for (i = 0; i < nr_ctrls; ++i) {
+ for (i = 0; i < model->num_controls; ++i) {
if (controls[i].addr)
wrmsrl(controls[i].addr, controls[i].saved);
}

- for (i = 0; i < nr_ctrs; ++i) {
+ for (i = 0; i < model->num_counters; ++i) {
if (counters[i].addr)
wrmsrl(counters[i].addr, counters[i].saved);
}
--
1.6.3.1

Subject: [PATCH 15/23] x86/oprofile: replace macros to calculate control register

This patch introduces op_x86_get_ctrl() to calculate the value of the
performance control register. This is generic code usable for all
models. The event and reserved masks are model specific and stored in
struct op_x86_model_spec. 64 bit MSR functions are used now. The patch
removes many hard to read macros used for ctrl calculation.

The function op_x86_get_ctrl() is common code and the first step to
further merge performance counter implementations for x86 models.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/nmi_int.c | 20 ++++++++++++++++++
arch/x86/oprofile/op_model_amd.c | 41 ++++++++++++++----------------------
arch/x86/oprofile/op_model_ppro.c | 29 +++++++++++--------------
arch/x86/oprofile/op_x86_model.h | 15 ++++++++++---
4 files changed, 60 insertions(+), 45 deletions(-)

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index c31f87b..388ee15 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -31,6 +31,26 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;

+/* common functions */
+
+u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
+ struct op_counter_config *counter_config)
+{
+ u64 val = 0;
+ u16 event = (u16)counter_config->event;
+
+ val |= ARCH_PERFMON_EVENTSEL_INT;
+ val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
+ val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
+ val |= (counter_config->unit_mask & 0xFF) << 8;
+ event &= model->event_mask ? model->event_mask : 0xFF;
+ val |= event & 0xFF;
+ val |= (event & 0x0F00) << 24;
+
+ return val;
+}
+
+
static int profile_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 86e0a01..2406ab8 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -25,12 +25,11 @@

#define NUM_COUNTERS 4
#define NUM_CONTROLS 4
+#define OP_EVENT_MASK 0x0FFF
+
+#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))

#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
-#define CTRL_CLEAR_LO(x) (x &= (1<<21))
-#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
-#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
-#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))

static unsigned long reset_value[NUM_COUNTERS];

@@ -84,21 +83,19 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
}
}

-
static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;

/* clear all counters */
for (i = 0 ; i < NUM_CONTROLS; ++i) {
if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
continue;
- rdmsr(msrs->controls[i].addr, low, high);
- CTRL_CLEAR_LO(low);
- CTRL_CLEAR_HI(high);
- wrmsr(msrs->controls[i].addr, low, high);
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ wrmsrl(msrs->controls[i].addr, val);
}

/* avoid a false detection of ctr overflows in NMI handler */
@@ -112,19 +109,11 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
for (i = 0; i < NUM_COUNTERS; ++i) {
if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
reset_value[i] = counter_config[i].count;
-
wrmsr(msrs->counters[i].addr, -(unsigned int)counter_config[i].count, -1);
-
- rdmsr(msrs->controls[i].addr, low, high);
- CTRL_CLEAR_LO(low);
- CTRL_CLEAR_HI(high);
- CTRL_SET_ENABLE(low);
- CTRL_SET_USR(low, counter_config[i].user);
- CTRL_SET_KERN(low, counter_config[i].kernel);
- CTRL_SET_UM(low, counter_config[i].unit_mask);
- CTRL_SET_EVENT_LOW(low, counter_config[i].event);
- CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
- wrmsr(msrs->controls[i].addr, low, high);
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ val |= op_x86_get_ctrl(model, &counter_config[i]);
+ wrmsrl(msrs->controls[i].addr, val);
} else {
reset_value[i] = 0;
}
@@ -486,14 +475,16 @@ static void op_amd_exit(void) {}
#endif /* CONFIG_OPROFILE_IBS */

struct op_x86_model_spec const op_amd_spec = {
- .init = op_amd_init,
- .exit = op_amd_exit,
.num_counters = NUM_COUNTERS,
.num_controls = NUM_CONTROLS,
+ .reserved = MSR_AMD_EVENTSEL_RESERVED,
+ .event_mask = OP_EVENT_MASK,
+ .init = op_amd_init,
+ .exit = op_amd_exit,
.fill_in_addresses = &op_amd_fill_in_addresses,
.setup_ctrs = &op_amd_setup_ctrs,
.check_ctrs = &op_amd_check_ctrs,
.start = &op_amd_start,
.stop = &op_amd_stop,
- .shutdown = &op_amd_shutdown
+ .shutdown = &op_amd_shutdown,
};
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 40b44ee..3092f99 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -10,6 +10,7 @@
* @author Philippe Elie
* @author Graydon Hoare
* @author Andi Kleen
+ * @author Robert Richter <[email protected]>
*/

#include <linux/oprofile.h>
@@ -26,8 +27,8 @@ static int num_counters = 2;
static int counter_width = 32;

#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
-#define CTRL_CLEAR(x) (x &= (1<<21))
-#define CTRL_SET_EVENT(val, e) (val |= e)
+
+#define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21))

static u64 *reset_value;

@@ -54,7 +55,7 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs)
static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;

if (!reset_value) {
@@ -85,9 +86,9 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
for (i = 0 ; i < num_counters; ++i) {
if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
continue;
- rdmsr(msrs->controls[i].addr, low, high);
- CTRL_CLEAR(low);
- wrmsr(msrs->controls[i].addr, low, high);
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ wrmsrl(msrs->controls[i].addr, val);
}

/* avoid a false detection of ctr overflows in NMI handler */
@@ -101,17 +102,11 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
for (i = 0; i < num_counters; ++i) {
if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
reset_value[i] = counter_config[i].count;
-
wrmsrl(msrs->counters[i].addr, -reset_value[i]);
-
- rdmsr(msrs->controls[i].addr, low, high);
- CTRL_CLEAR(low);
- CTRL_SET_ENABLE(low);
- CTRL_SET_USR(low, counter_config[i].user);
- CTRL_SET_KERN(low, counter_config[i].kernel);
- CTRL_SET_UM(low, counter_config[i].unit_mask);
- CTRL_SET_EVENT(low, counter_config[i].event);
- wrmsr(msrs->controls[i].addr, low, high);
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ val |= op_x86_get_ctrl(model, &counter_config[i]);
+ wrmsrl(msrs->controls[i].addr, val);
} else {
reset_value[i] = 0;
}
@@ -205,6 +200,7 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
struct op_x86_model_spec const op_ppro_spec = {
.num_counters = 2,
.num_controls = 2,
+ .reserved = MSR_PPRO_EVENTSEL_RESERVED,
.fill_in_addresses = &ppro_fill_in_addresses,
.setup_ctrs = &ppro_setup_ctrs,
.check_ctrs = &ppro_check_ctrs,
@@ -249,6 +245,7 @@ static int arch_perfmon_init(struct oprofile_operations *ignore)
}

struct op_x86_model_spec op_arch_perfmon_spec = {
+ .reserved = MSR_PPRO_EVENTSEL_RESERVED,
.init = &arch_perfmon_init,
/* num_counters/num_controls filled in at runtime */
.fill_in_addresses = &ppro_fill_in_addresses,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 6161c7f..3220d4c 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -6,21 +6,19 @@
* @remark Read the file COPYING
*
* @author Graydon Hoare
+ * @author Robert Richter <[email protected]>
*/

#ifndef OP_X86_MODEL_H
#define OP_X86_MODEL_H

+#include <asm/types.h>
#include <asm/intel_arch_perfmon.h>

#define CTR_IS_RESERVED(msrs, c) ((msrs)->counters[(c)].addr ? 1 : 0)
#define CTRL_IS_RESERVED(msrs, c) ((msrs)->controls[(c)].addr ? 1 : 0)
#define CTRL_SET_ACTIVE(val) ((val) |= ARCH_PERFMON_EVENTSEL0_ENABLE)
-#define CTRL_SET_ENABLE(val) ((val) |= ARCH_PERFMON_EVENTSEL_INT)
#define CTRL_SET_INACTIVE(val) ((val) &= ~ARCH_PERFMON_EVENTSEL0_ENABLE)
-#define CTRL_SET_KERN(val, k) ((val) |= ((k) ? ARCH_PERFMON_EVENTSEL_OS : 0))
-#define CTRL_SET_USR(val, u) ((val) |= ((u) ? ARCH_PERFMON_EVENTSEL_USR : 0))
-#define CTRL_SET_UM(val, m) ((val) |= ((m) << 8))

struct op_saved_msr {
unsigned int high;
@@ -39,12 +37,16 @@ struct op_msrs {

struct pt_regs;

+struct oprofile_operations;
+
/* The model vtable abstracts the differences between
* various x86 CPU models' perfctr support.
*/
struct op_x86_model_spec {
unsigned int num_counters;
unsigned int num_controls;
+ u64 reserved;
+ u16 event_mask;
int (*init)(struct oprofile_operations *ops);
void (*exit)(void);
void (*fill_in_addresses)(struct op_msrs * const msrs);
@@ -57,6 +59,11 @@ struct op_x86_model_spec {
void (*shutdown)(struct op_msrs const * const msrs);
};

+struct op_counter_config;
+
+extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
+ struct op_counter_config *counter_config);
+
extern struct op_x86_model_spec const op_ppro_spec;
extern struct op_x86_model_spec const op_p4_spec;
extern struct op_x86_model_spec const op_p4_ht2_spec;
--
1.6.3.1

Subject: [PATCH 22/23] x86/oprofile: use 64 bit values in IBS functions

The IBS code internally uses 32 bit values (a low and a high value) to
represent a 64 bit value. This patch changes this and now 64 bit
values are used instead. 64 bit MSR functions can be used now.

No functional changes.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 131 ++++++++++++++++++--------------------
1 files changed, 61 insertions(+), 70 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 9bf9017..6493ef7 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -35,16 +35,18 @@ static unsigned long reset_value[NUM_COUNTERS];
#ifdef CONFIG_OPROFILE_IBS

/* IbsFetchCtl bits/masks */
-#define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */
-#define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */
-#define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */
+#define IBS_FETCH_RAND_EN (1ULL<<57)
+#define IBS_FETCH_VAL (1ULL<<49)
+#define IBS_FETCH_ENABLE (1ULL<<48)
+#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL

/*IbsOpCtl bits */
-#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */
-#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */
+#define IBS_OP_CNT_CTL (1ULL<<19)
+#define IBS_OP_VAL (1ULL<<18)
+#define IBS_OP_ENABLE (1ULL<<17)

-#define IBS_FETCH_SIZE 6
-#define IBS_OP_SIZE 12
+#define IBS_FETCH_SIZE 6
+#define IBS_OP_SIZE 12

static int has_ibs; /* AMD Family10h and later */

@@ -126,66 +128,63 @@ static inline int
op_amd_handle_ibs(struct pt_regs * const regs,
struct op_msrs const * const msrs)
{
- u32 low, high;
- u64 msr;
+ u64 val, ctl;
struct op_entry entry;

if (!has_ibs)
return 1;

if (ibs_config.fetch_enabled) {
- rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
- if (high & IBS_FETCH_HIGH_VALID_BIT) {
- rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr);
- oprofile_write_reserve(&entry, regs, msr,
+ rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
+ if (ctl & IBS_FETCH_VAL) {
+ rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
+ oprofile_write_reserve(&entry, regs, val,
IBS_FETCH_CODE, IBS_FETCH_SIZE);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
- oprofile_add_data(&entry, low);
- oprofile_add_data(&entry, high);
- rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
+ oprofile_add_data(&entry, (u32)val);
+ oprofile_add_data(&entry, (u32)(val >> 32));
+ oprofile_add_data(&entry, (u32)ctl);
+ oprofile_add_data(&entry, (u32)(ctl >> 32));
+ rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
+ oprofile_add_data(&entry, (u32)val);
+ oprofile_add_data(&entry, (u32)(val >> 32));
oprofile_write_commit(&entry);

/* reenable the IRQ */
- high &= ~IBS_FETCH_HIGH_VALID_BIT;
- high |= IBS_FETCH_HIGH_ENABLE;
- low &= IBS_FETCH_LOW_MAX_CNT_MASK;
- wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+ ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK);
+ ctl |= IBS_FETCH_ENABLE;
+ wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
}
}

if (ibs_config.op_enabled) {
- rdmsr(MSR_AMD64_IBSOPCTL, low, high);
- if (low & IBS_OP_LOW_VALID_BIT) {
- rdmsrl(MSR_AMD64_IBSOPRIP, msr);
- oprofile_write_reserve(&entry, regs, msr,
+ rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
+ if (ctl & IBS_OP_VAL) {
+ rdmsrl(MSR_AMD64_IBSOPRIP, val);
+ oprofile_write_reserve(&entry, regs, val,
IBS_OP_CODE, IBS_OP_SIZE);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
- rdmsrl(MSR_AMD64_IBSOPDATA, msr);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
- rdmsrl(MSR_AMD64_IBSOPDATA2, msr);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
- rdmsrl(MSR_AMD64_IBSOPDATA3, msr);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
- rdmsrl(MSR_AMD64_IBSDCLINAD, msr);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
- rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
+ oprofile_add_data(&entry, (u32)val);
+ oprofile_add_data(&entry, (u32)(val >> 32));
+ rdmsrl(MSR_AMD64_IBSOPDATA, val);
+ oprofile_add_data(&entry, (u32)val);
+ oprofile_add_data(&entry, (u32)(val >> 32));
+ rdmsrl(MSR_AMD64_IBSOPDATA2, val);
+ oprofile_add_data(&entry, (u32)val);
+ oprofile_add_data(&entry, (u32)(val >> 32));
+ rdmsrl(MSR_AMD64_IBSOPDATA3, val);
+ oprofile_add_data(&entry, (u32)val);
+ oprofile_add_data(&entry, (u32)(val >> 32));
+ rdmsrl(MSR_AMD64_IBSDCLINAD, val);
+ oprofile_add_data(&entry, (u32)val);
+ oprofile_add_data(&entry, (u32)(val >> 32));
+ rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
+ oprofile_add_data(&entry, (u32)val);
+ oprofile_add_data(&entry, (u32)(val >> 32));
oprofile_write_commit(&entry);

/* reenable the IRQ */
- high = 0;
- low &= ~IBS_OP_LOW_VALID_BIT;
- low |= IBS_OP_LOW_ENABLE;
- wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+ ctl &= ~IBS_OP_VAL & 0xFFFFFFFF;
+ ctl |= IBS_OP_ENABLE;
+ wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
}
}

@@ -194,39 +193,31 @@ op_amd_handle_ibs(struct pt_regs * const regs,

static inline void op_amd_start_ibs(void)
{
- unsigned int low, high;
+ u64 val;
if (has_ibs && ibs_config.fetch_enabled) {
- low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
- high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */
- + IBS_FETCH_HIGH_ENABLE;
- wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+ val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
+ val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
+ val |= IBS_FETCH_ENABLE;
+ wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
}

if (has_ibs && ibs_config.op_enabled) {
- low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF)
- + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */
- + IBS_OP_LOW_ENABLE;
- high = 0;
- wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+ val = (ibs_config.max_cnt_op >> 4) & 0xFFFF;
+ val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
+ val |= IBS_OP_ENABLE;
+ wrmsrl(MSR_AMD64_IBSOPCTL, val);
}
}

static void op_amd_stop_ibs(void)
{
- unsigned int low, high;
- if (has_ibs && ibs_config.fetch_enabled) {
+ if (has_ibs && ibs_config.fetch_enabled)
/* clear max count and enable */
- low = 0;
- high = 0;
- wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
- }
+ wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);

- if (has_ibs && ibs_config.op_enabled) {
+ if (has_ibs && ibs_config.op_enabled)
/* clear max count and enable */
- low = 0;
- high = 0;
- wrmsr(MSR_AMD64_IBSOPCTL, low, high);
- }
+ wrmsrl(MSR_AMD64_IBSOPCTL, 0);
}

#else
--
1.6.3.1

Subject: [PATCH 06/23] x86/oprofile: remove #ifdefs in ibs functions

IBS code is moved to separate functions. This allows the removal
of #ifdefs in functions.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 80 ++++++++++++++++++++++----------------
1 files changed, 46 insertions(+), 34 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 8fdf06e..b54c088 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -220,6 +220,50 @@ op_amd_handle_ibs(struct pt_regs * const regs,
return 1;
}

+static inline void op_amd_start_ibs(void)
+{
+ unsigned int low, high;
+ if (has_ibs && ibs_config.fetch_enabled) {
+ low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
+ high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */
+ + IBS_FETCH_HIGH_ENABLE;
+ wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+ }
+
+ if (has_ibs && ibs_config.op_enabled) {
+ low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF)
+ + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */
+ + IBS_OP_LOW_ENABLE;
+ high = 0;
+ wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+ }
+}
+
+static void op_amd_stop_ibs(void)
+{
+ unsigned int low, high;
+ if (has_ibs && ibs_config.fetch_enabled) {
+ /* clear max count and enable */
+ low = 0;
+ high = 0;
+ wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+ }
+
+ if (has_ibs && ibs_config.op_enabled) {
+ /* clear max count and enable */
+ low = 0;
+ high = 0;
+ wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+ }
+}
+
+#else
+
+static inline int op_amd_handle_ibs(struct pt_regs * const regs,
+ struct op_msrs const * const msrs) { }
+static inline void op_amd_start_ibs(void) { }
+static inline void op_amd_stop_ibs(void) { }
+
#endif

static int op_amd_check_ctrs(struct pt_regs * const regs,
@@ -238,9 +282,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
}
}

-#ifdef CONFIG_OPROFILE_IBS
op_amd_handle_ibs(regs, msrs);
-#endif

/* See op_model_ppro.c */
return 1;
@@ -258,25 +300,9 @@ static void op_amd_start(struct op_msrs const * const msrs)
}
}

-#ifdef CONFIG_OPROFILE_IBS
- if (has_ibs && ibs_config.fetch_enabled) {
- low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
- high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */
- + IBS_FETCH_HIGH_ENABLE;
- wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
- }
-
- if (has_ibs && ibs_config.op_enabled) {
- low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF)
- + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */
- + IBS_OP_LOW_ENABLE;
- high = 0;
- wrmsr(MSR_AMD64_IBSOPCTL, low, high);
- }
-#endif
+ op_amd_start_ibs();
}

-
static void op_amd_stop(struct op_msrs const * const msrs)
{
unsigned int low, high;
@@ -294,21 +320,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
CTRL_WRITE(low, high, msrs, i);
}

-#ifdef CONFIG_OPROFILE_IBS
- if (has_ibs && ibs_config.fetch_enabled) {
- /* clear max count and enable */
- low = 0;
- high = 0;
- wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
- }
-
- if (has_ibs && ibs_config.op_enabled) {
- /* clear max count and enable */
- low = 0;
- high = 0;
- wrmsr(MSR_AMD64_IBSOPCTL, low, high);
- }
-#endif
+ op_amd_stop_ibs();
}

static void op_amd_shutdown(struct op_msrs const * const msrs)
--
1.6.3.1

Subject: [PATCH 23/23] x86/oprofile: introduce oprofile_add_data64()

The IBS implemention writes 64 bit register values to the cpu buffer
by writing two 32 values using oprofile_add_data(). This patch
introduces oprofile_add_data64() to write a single 64 bit value to the
buffer.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 27 +++++++++------------------
drivers/oprofile/cpu_buffer.c | 15 +++++++++++++++
include/linux/oprofile.h | 1 +
3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 6493ef7..cc93046 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -140,13 +140,10 @@ op_amd_handle_ibs(struct pt_regs * const regs,
rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
oprofile_write_reserve(&entry, regs, val,
IBS_FETCH_CODE, IBS_FETCH_SIZE);
- oprofile_add_data(&entry, (u32)val);
- oprofile_add_data(&entry, (u32)(val >> 32));
- oprofile_add_data(&entry, (u32)ctl);
- oprofile_add_data(&entry, (u32)(ctl >> 32));
+ oprofile_add_data64(&entry, val);
+ oprofile_add_data64(&entry, ctl);
rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
- oprofile_add_data(&entry, (u32)val);
- oprofile_add_data(&entry, (u32)(val >> 32));
+ oprofile_add_data64(&entry, val);
oprofile_write_commit(&entry);

/* reenable the IRQ */
@@ -162,23 +159,17 @@ op_amd_handle_ibs(struct pt_regs * const regs,
rdmsrl(MSR_AMD64_IBSOPRIP, val);
oprofile_write_reserve(&entry, regs, val,
IBS_OP_CODE, IBS_OP_SIZE);
- oprofile_add_data(&entry, (u32)val);
- oprofile_add_data(&entry, (u32)(val >> 32));
+ oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSOPDATA, val);
- oprofile_add_data(&entry, (u32)val);
- oprofile_add_data(&entry, (u32)(val >> 32));
+ oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSOPDATA2, val);
- oprofile_add_data(&entry, (u32)val);
- oprofile_add_data(&entry, (u32)(val >> 32));
+ oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSOPDATA3, val);
- oprofile_add_data(&entry, (u32)val);
- oprofile_add_data(&entry, (u32)(val >> 32));
+ oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSDCLINAD, val);
- oprofile_add_data(&entry, (u32)val);
- oprofile_add_data(&entry, (u32)(val >> 32));
+ oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
- oprofile_add_data(&entry, (u32)val);
- oprofile_add_data(&entry, (u32)(val >> 32));
+ oprofile_add_data64(&entry, val);
oprofile_write_commit(&entry);

/* reenable the IRQ */
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 50640cc..a7aae24 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -406,6 +406,21 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val)
return op_cpu_buffer_add_data(entry, val);
}

+int oprofile_add_data64(struct op_entry *entry, u64 val)
+{
+ if (!entry->event)
+ return 0;
+ if (op_cpu_buffer_get_size(entry) < 2)
+ /*
+ * the function returns 0 to indicate a too small
+ * buffer, even if there is some space left
+ */
+ return 0;
+ if (!op_cpu_buffer_add_data(entry, (u32)val))
+ return 0;
+ return op_cpu_buffer_add_data(entry, (u32)(val >> 32));
+}
+
int oprofile_write_commit(struct op_entry *entry)
{
if (!entry->event)
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index dbbe2db..d68d2ed 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -179,6 +179,7 @@ void oprofile_write_reserve(struct op_entry *entry,
struct pt_regs * const regs,
unsigned long pc, int code, int size);
int oprofile_add_data(struct op_entry *entry, unsigned long val);
+int oprofile_add_data64(struct op_entry *entry, u64 val);
int oprofile_write_commit(struct op_entry *entry);

#endif /* OPROFILE_H */
--
1.6.3.1

Subject: [PATCH 13/23] x86/oprofile: remove unused macros for AMD virtualization profiling

The use of the macros has no effect. The oprofilefs has to be extended
first to support these features.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 5 -----
1 files changed, 0 insertions(+), 5 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index c6181c2..aaa7ffa 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -31,8 +31,6 @@
#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
-#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
-#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))

static unsigned long reset_value[NUM_COUNTERS];

@@ -125,9 +123,6 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
CTRL_SET_UM(low, counter_config[i].unit_mask);
CTRL_SET_EVENT_LOW(low, counter_config[i].event);
CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
- CTRL_SET_HOST_ONLY(high, 0);
- CTRL_SET_GUEST_ONLY(high, 0);
-
wrmsr(msrs->controls[i].addr, low, high);
} else {
reset_value[i] = 0;
--
1.6.3.1

Subject: [PATCH 07/23] x86/oprofile: simplify AMD cpu init code

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/nmi_int.c | 15 +++++++--------
1 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index dd85153..ae0ab03 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -460,27 +460,26 @@ int __init op_nmi_init(struct oprofile_operations *ops)
/* Needs to be at least an Athlon (or hammer in 32bit mode) */

switch (family) {
- default:
- return -ENODEV;
case 6:
- model = &op_amd_spec;
cpu_type = "i386/athlon";
break;
case 0xf:
- model = &op_amd_spec;
- /* Actually it could be i386/hammer too, but give
- user space an consistent name. */
+ /*
+ * Actually it could be i386/hammer too, but
+ * give user space an consistent name.
+ */
cpu_type = "x86-64/hammer";
break;
case 0x10:
- model = &op_amd_spec;
cpu_type = "x86-64/family10";
break;
case 0x11:
- model = &op_amd_spec;
cpu_type = "x86-64/family11h";
break;
+ default:
+ return -ENODEV;
}
+ model = &op_amd_spec;
break;

case X86_VENDOR_INTEL:
--
1.6.3.1

Subject: [PATCH 16/23] x86/oprofile: replace CTR_OVERFLOWED macros

The patch replaces all CTR_OVERFLOWED macros. 64 bit MSR functions and
64 bit counter values are used now. Thus, it will be easier to later
extend the models to use more than 32 bit width counters.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 16 ++++++++--------
arch/x86/oprofile/op_model_p4.c | 6 +++---
arch/x86/oprofile/op_model_ppro.c | 10 ++++------
3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 2406ab8..b5d678f 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -26,11 +26,10 @@
#define NUM_COUNTERS 4
#define NUM_CONTROLS 4
#define OP_EVENT_MASK 0x0FFF
+#define OP_CTR_OVERFLOW (1ULL<<31)

#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))

-#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
-
static unsigned long reset_value[NUM_COUNTERS];

#ifdef CONFIG_OPROFILE_IBS
@@ -241,17 +240,18 @@ static inline void op_amd_stop_ibs(void) { }
static int op_amd_check_ctrs(struct pt_regs * const regs,
struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;

for (i = 0 ; i < NUM_COUNTERS; ++i) {
if (!reset_value[i])
continue;
- rdmsr(msrs->counters[i].addr, low, high);
- if (CTR_OVERFLOWED(low)) {
- oprofile_add_sample(regs, i);
- wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1);
- }
+ rdmsrl(msrs->counters[i].addr, val);
+ /* bit is clear if overflowed: */
+ if (val & OP_CTR_OVERFLOW)
+ continue;
+ oprofile_add_sample(regs, i);
+ wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1);
}

op_amd_handle_ibs(regs, msrs);
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 05ba028..ac4ca28 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -32,6 +32,8 @@
#define NUM_CCCRS_HT2 9
#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)

+#define OP_CTR_OVERFLOW (1ULL<<31)
+
static unsigned int num_counters = NUM_COUNTERS_NON_HT;
static unsigned int num_controls = NUM_CONTROLS_NON_HT;

@@ -362,8 +364,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))

-#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
-

/* this assigns a "stagger" to the current CPU, which is used throughout
the code in this module as an extra array offset, to select the "even"
@@ -622,7 +622,7 @@ static int p4_check_ctrs(struct pt_regs * const regs,

rdmsr(p4_counters[real].cccr_address, low, high);
rdmsr(p4_counters[real].counter_address, ctr, high);
- if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+ if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
oprofile_add_sample(regs, i);
wrmsr(p4_counters[real].counter_address,
-(u32)reset_value[i], -1);
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 3092f99..82db396 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -26,8 +26,6 @@
static int num_counters = 2;
static int counter_width = 32;

-#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
-
#define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21))

static u64 *reset_value;
@@ -124,10 +122,10 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
if (!reset_value[i])
continue;
rdmsrl(msrs->counters[i].addr, val);
- if (CTR_OVERFLOWED(val)) {
- oprofile_add_sample(regs, i);
- wrmsrl(msrs->counters[i].addr, -reset_value[i]);
- }
+ if (val & (1ULL << (counter_width - 1)))
+ continue;
+ oprofile_add_sample(regs, i);
+ wrmsrl(msrs->counters[i].addr, -reset_value[i]);
}

/* Only P6 based Pentium M need to re-unmask the apic vector but it
--
1.6.3.1

Subject: [PATCH 14/23] x86/oprofile: pass the model to setup_ctrs() functions

In follow-on patches the setup_ctrs() functions will need data that
describes the model. This patch extends the function argument list to
pass a pointer of the model to these function.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/nmi_int.c | 2 +-
arch/x86/oprofile/op_model_amd.c | 3 ++-
arch/x86/oprofile/op_model_p4.c | 3 ++-
arch/x86/oprofile/op_model_ppro.c | 3 ++-
arch/x86/oprofile/op_x86_model.h | 3 ++-
5 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index ae0ab03..c31f87b 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -125,7 +125,7 @@ static void nmi_cpu_setup(void *dummy)
int cpu = smp_processor_id();
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
spin_lock(&oprofilefs_lock);
- model->setup_ctrs(msrs);
+ model->setup_ctrs(model, msrs);
spin_unlock(&oprofilefs_lock);
per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
apic_write(APIC_LVTPC, APIC_DM_NMI);
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index aaa7ffa..86e0a01 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -85,7 +85,8 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
}


-static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
+static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
{
unsigned int low, high;
int i;
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 365d8a9..05ba028 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -542,7 +542,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
}


-static void p4_setup_ctrs(struct op_msrs const * const msrs)
+static void p4_setup_ctrs(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
{
unsigned int i;
unsigned int low, high;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 61ee8f6..40b44ee 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -51,7 +51,8 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs)
}


-static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
{
unsigned int low, high;
int i;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index a207b1c..6161c7f 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -48,7 +48,8 @@ struct op_x86_model_spec {
int (*init)(struct oprofile_operations *ops);
void (*exit)(void);
void (*fill_in_addresses)(struct op_msrs * const msrs);
- void (*setup_ctrs)(struct op_msrs const * const msrs);
+ void (*setup_ctrs)(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs);
int (*check_ctrs)(struct pt_regs * const regs,
struct op_msrs const * const msrs);
void (*start)(struct op_msrs const * const msrs);
--
1.6.3.1

Subject: [PATCH] x86/oprofile: fix initialization of arch_perfmon for core_i7

Commit:

e419294 x86/oprofile: moving arch_perfmon counter setup to op_x86_model_spec.init

introduced a bug in the initialization of core_i7 leading to the
incorrect model setup to &op_ppro_spec. This patch fixes this.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/nmi_int.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)

You may pull the patch from the git repository at:

git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile.git master

The branch also contains the merge with tip/perfcounters-for-linus to
solve merge conflicts with Linus' tree.

-Robert

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 7826dfc..28ee490 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -406,6 +406,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
static int __init ppro_init(char **cpu_type)
{
__u8 cpu_model = boot_cpu_data.x86_model;
+ struct op_x86_model_spec const *spec = &op_ppro_spec; /* default */

if (force_arch_perfmon && cpu_has_arch_perfmon)
return 0;
@@ -432,7 +433,7 @@ static int __init ppro_init(char **cpu_type)
*cpu_type = "i386/core_2";
break;
case 26:
- model = &op_arch_perfmon_spec;
+ spec = &op_arch_perfmon_spec;
*cpu_type = "i386/core_i7";
break;
case 28:
@@ -443,7 +444,7 @@ static int __init ppro_init(char **cpu_type)
return 0;
}

- model = &op_ppro_spec;
+ model = spec;
return 1;
}

--
1.6.3.1

2009-06-22 07:49:31

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 19/23] x86/oprofile: use 64 bit wrmsr functions

Robert Richter <[email protected]> writes:
> val |= op_x86_get_ctrl(model, &counter_config[i]);
> @@ -251,7 +252,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
> if (val & OP_CTR_OVERFLOW)
> continue;
> oprofile_add_sample(regs, i);
> - wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1);
> + wrmsrl(msrs->counters[i].addr, -(s64)reset_value[i]);

These are not equivalent on 64bit because reset_value is a long,
so the new code doesn't 32->64 extend anymore. You would
need a (s32) cast, not a (s64)

I don't think if it causes real problems, but it looks suspicious.

That's all over in multiple files.

-Andi

--
[email protected] -- Speaking for myself only.

2009-06-22 08:08:05

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 19/23] x86/oprofile: use 64 bit wrmsr functions

Andi Kleen <[email protected]> writes:
>
> I don't think if it causes real problems, but it looks suspicious.

s/think/know/

-Andi

--
[email protected] -- Speaking for myself only.