Subject: [PATCH 0/9] oprofile: implement support of samples with variable data size

This patch series implements support of samples with variable data
size. Current implementation has a fixed sample size of 2 unsigned
longs. This makes it hard to implement use cases with a different
sample size (AMD IBS or power pc cell) since buffer locking is
required or samples my become incomplete. The internal cpu buffer
usage has been changed now and allows the attachment of data with
varable length to samples.

There are patches that also change the current ibs implementation. In
the end the ibs implentation could be removed from the general
oprofile code to model specific code. An API is available that
provides generic functions to use variable sample sizes.

The patches are also available here:

git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile.git oprofile-for-tip

-Robert



Subject: [PATCH 4/9] oprofile: add op_cpu_buffer_add_data()

This function can be used to attach data to a sample. It returns the
remaining free buffer size that has been reserved with
op_cpu_buffer_write_reserve().

Signed-off-by: Robert Richter <[email protected]>
---
drivers/oprofile/cpu_buffer.c | 2 +-
drivers/oprofile/cpu_buffer.h | 12 ++++++++++++
2 files changed, 13 insertions(+), 1 deletions(-)

diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index e859d23..1b65907 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -258,7 +258,7 @@ op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
sample->event = flags;

if (size)
- sample->data[0] = (unsigned long)task;
+ op_cpu_buffer_add_data(&entry, (unsigned long)task);

op_cpu_buffer_write_commit(&entry);

diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index e634dcf..e178dd2 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -78,6 +78,18 @@ int op_cpu_buffer_write_commit(struct op_entry *entry);
struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu);
unsigned long op_cpu_buffer_entries(int cpu);

+/* returns the remaining free size of data in the entry */
+static inline
+int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val)
+{
+ if (!entry->size)
+ return 0;
+ *entry->data = val;
+ entry->size--;
+ entry->data++;
+ return entry->size;
+}
+
/* extra data flags */
#define KERNEL_CTX_SWITCH (1UL << 0)
#define IS_KERNEL (1UL << 1)
--
1.6.0.1

Subject: [PATCH 2/9] oprofile: modify op_cpu_buffer_read_entry()

This implements the support of samples with attached data.

Signed-off-by: Robert Richter <[email protected]>
---
drivers/oprofile/buffer_sync.c | 23 +++++++++++++----------
drivers/oprofile/cpu_buffer.c | 14 +++++++++++---
drivers/oprofile/cpu_buffer.h | 2 +-
3 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 21fd249..908202a 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -329,9 +329,10 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm)
int i, count;
unsigned long cookie = 0;
off_t offset;
+ struct op_entry entry;
struct op_sample *sample;

- sample = op_cpu_buffer_read_entry(cpu);
+ sample = op_cpu_buffer_read_entry(&entry, cpu);
if (!sample)
return;
pc = sample->eip;
@@ -370,7 +371,7 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm)
count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/

for (i = 0; i < count; i++) {
- sample = op_cpu_buffer_read_entry(cpu);
+ sample = op_cpu_buffer_read_entry(&entry, cpu);
if (!sample)
return;
add_event_entry(sample->eip);
@@ -528,6 +529,8 @@ void sync_buffer(int cpu)
sync_buffer_state state = sb_buffer_start;
unsigned int i;
unsigned long available;
+ struct op_entry entry;
+ struct op_sample *sample;

mutex_lock(&buffer_mutex);

@@ -537,19 +540,19 @@ void sync_buffer(int cpu)
available = op_cpu_buffer_entries(cpu);

for (i = 0; i < available; ++i) {
- struct op_sample *s = op_cpu_buffer_read_entry(cpu);
- if (!s)
+ sample = op_cpu_buffer_read_entry(&entry, cpu);
+ if (!sample)
break;

- if (is_code(s->eip)) {
- switch (s->event) {
+ if (is_code(sample->eip)) {
+ switch (sample->event) {
case 0:
case CPU_IS_KERNEL:
/* kernel/userspace switch */
- in_kernel = s->event;
+ in_kernel = sample->event;
if (state == sb_buffer_start)
state = sb_sample_start;
- add_kernel_ctx_switch(s->event);
+ add_kernel_ctx_switch(sample->event);
break;
case CPU_TRACE_BEGIN:
state = sb_bt_start;
@@ -566,7 +569,7 @@ void sync_buffer(int cpu)
default:
/* userspace context switch */
oldmm = mm;
- new = (struct task_struct *)s->event;
+ new = (struct task_struct *)sample->event;
release_mm(oldmm);
mm = take_tasks_mm(new);
if (mm != oldmm)
@@ -581,7 +584,7 @@ void sync_buffer(int cpu)
/* ignore sample */
continue;

- if (add_sample(mm, s, in_kernel))
+ if (add_sample(mm, sample, in_kernel))
continue;

/* ignore backtraces if failed to add a sample */
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 934ff15..400f7fc 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -182,20 +182,28 @@ int op_cpu_buffer_write_commit(struct op_entry *entry)
entry->irq_flags);
}

-struct op_sample *op_cpu_buffer_read_entry(int cpu)
+struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
{
struct ring_buffer_event *e;
e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
if (e)
- return ring_buffer_event_data(e);
+ goto event;
if (ring_buffer_swap_cpu(op_ring_buffer_read,
op_ring_buffer_write,
cpu))
return NULL;
e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
if (e)
- return ring_buffer_event_data(e);
+ goto event;
return NULL;
+
+event:
+ entry->event = e;
+ entry->sample = ring_buffer_event_data(e);
+ entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample))
+ / sizeof(entry->sample->data[0]);
+ entry->data = entry->sample->data;
+ return entry->sample;
}

unsigned long op_cpu_buffer_entries(int cpu)
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index 2d4bfde..d7c0545 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -75,7 +75,7 @@ static inline void op_cpu_buffer_reset(int cpu)
struct op_sample
*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size);
int op_cpu_buffer_write_commit(struct op_entry *entry);
-struct op_sample *op_cpu_buffer_read_entry(int cpu);
+struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu);
unsigned long op_cpu_buffer_entries(int cpu);

/* transient events for the CPU buffer -> event buffer */
--
1.6.0.1

Subject: [PATCH 1/9] oprofile: add op_cpu_buffer_write_reserve()

This function prepares the cpu buffer to write a sample.

Struct op_entry is used during operations on the ring buffer while
struct op_sample contains the data that is stored in the ring
buffer. Struct entry can be uninitialized. The function reserves a
data array that is specified by size. Use op_cpu_buffer_write_commit()
after preparing the sample. In case of errors a null pointer is
returned, otherwise the pointer to the sample.

Signed-off-by: Robert Richter <[email protected]>
---
drivers/oprofile/cpu_buffer.c | 43 ++++++++++++++++++++++++++++------------
drivers/oprofile/cpu_buffer.h | 9 ++++++-
2 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index ac79f66..934ff15 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -1,11 +1,12 @@
/**
* @file cpu_buffer.c
*
- * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <[email protected]>
* @author Barry Kasindorf <[email protected]>
+ * @author Robert Richter <[email protected]>
*
* Each CPU has a local buffer that stores PC value/event
* pairs. We also log context switches when we notice them.
@@ -143,20 +144,36 @@ void end_cpu_work(void)
flush_scheduled_work();
}

-int op_cpu_buffer_write_entry(struct op_entry *entry)
+/*
+ * This function prepares the cpu buffer to write a sample.
+ *
+ * Struct op_entry is used during operations on the ring buffer while
+ * struct op_sample contains the data that is stored in the ring
+ * buffer. Struct entry can be uninitialized. The function reserves a
+ * data array that is specified by size. Use
+ * op_cpu_buffer_write_commit() after preparing the sample. In case of
+ * errors a null pointer is returned, otherwise the pointer to the
+ * sample.
+ *
+ */
+struct op_sample
+*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size)
{
- entry->event = ring_buffer_lock_reserve(op_ring_buffer_write,
- sizeof(struct op_sample),
- &entry->irq_flags);
+ entry->event = ring_buffer_lock_reserve
+ (op_ring_buffer_write, sizeof(struct op_sample) +
+ size * sizeof(entry->sample->data[0]), &entry->irq_flags);
if (entry->event)
entry->sample = ring_buffer_event_data(entry->event);
else
entry->sample = NULL;

if (!entry->sample)
- return -ENOMEM;
+ return NULL;

- return 0;
+ entry->size = size;
+ entry->data = entry->sample->data;
+
+ return entry->sample;
}

int op_cpu_buffer_write_commit(struct op_entry *entry)
@@ -192,14 +209,14 @@ op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
unsigned long pc, unsigned long event)
{
struct op_entry entry;
- int ret;
+ struct op_sample *sample;

- ret = op_cpu_buffer_write_entry(&entry);
- if (ret)
- return ret;
+ sample = op_cpu_buffer_write_reserve(&entry, 0);
+ if (!sample)
+ return -ENOMEM;

- entry.sample->eip = pc;
- entry.sample->event = event;
+ sample->eip = pc;
+ sample->event = event;

return op_cpu_buffer_write_commit(&entry);
}
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index 65b763a..2d4bfde 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -1,10 +1,11 @@
/**
* @file cpu_buffer.h
*
- * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <[email protected]>
+ * @author Robert Richter <[email protected]>
*/

#ifndef OPROFILE_CPU_BUFFER_H
@@ -31,12 +32,15 @@ void end_cpu_work(void);
struct op_sample {
unsigned long eip;
unsigned long event;
+ unsigned long data[0];
};

struct op_entry {
struct ring_buffer_event *event;
struct op_sample *sample;
unsigned long irq_flags;
+ unsigned long size;
+ unsigned long *data;
};

struct oprofile_cpu_buffer {
@@ -68,7 +72,8 @@ static inline void op_cpu_buffer_reset(int cpu)
cpu_buf->last_task = NULL;
}

-int op_cpu_buffer_write_entry(struct op_entry *entry);
+struct op_sample
+*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size);
int op_cpu_buffer_write_commit(struct op_entry *entry);
struct op_sample *op_cpu_buffer_read_entry(int cpu);
unsigned long op_cpu_buffer_entries(int cpu);
--
1.6.0.1

Subject: [PATCH 3/9] oprofile: rework implementation of cpu buffer events

Special events such as task or context switches are marked with an
escape code in the cpu buffer followed by an event code or a task
identifier. There is one escape code per event. To make escape
sequences also available for data samples the internal cpu buffer
format must be changed. The current implementation does not allow the
extension of event codes since this would lead to collisions with the
task identifiers. To avoid this, this patch introduces an event mask
that allows the storage of multiple events with one escape code. Now,
task identifiers are stored in the data section of the sample. The
implementation also allows the usage of custom data in a sample. As a
side effect the new code is much more readable and easier to
understand.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 8 +-
drivers/oprofile/buffer_sync.c | 42 ++++++------
drivers/oprofile/cpu_buffer.c | 139 ++++++++++++++++++++-----------------
drivers/oprofile/cpu_buffer.h | 12 ++--
4 files changed, 106 insertions(+), 95 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 423a954..f101724 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -2,7 +2,7 @@
* @file op_model_amd.c
* athlon / K7 / K8 / Family 10h model-specific MSR operations
*
- * @remark Copyright 2002-2008 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon
@@ -10,7 +10,7 @@
* @author Graydon Hoare
* @author Robert Richter <[email protected]>
* @author Barry Kasindorf
-*/
+ */

#include <linux/oprofile.h>
#include <linux/device.h>
@@ -62,8 +62,8 @@ static unsigned long reset_value[NUM_COUNTERS];

/* Codes used in cpu_buffer.c */
/* This produces duplicate code, need to be fixed */
-#define IBS_FETCH_BEGIN 3
-#define IBS_OP_BEGIN 4
+#define IBS_FETCH_BEGIN (1UL << 4)
+#define IBS_OP_BEGIN (1UL << 5)

/*
* The function interface needs to be fixed, something like add
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 908202a..d969bb1 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -1,11 +1,12 @@
/**
* @file buffer_sync.c
*
- * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <[email protected]>
* @author Barry Kasindorf
+ * @author Robert Richter <[email protected]>
*
* This is the core of the buffer management. Each
* CPU buffer is processed and entered into the
@@ -529,6 +530,7 @@ void sync_buffer(int cpu)
sync_buffer_state state = sb_buffer_start;
unsigned int i;
unsigned long available;
+ unsigned long flags;
struct op_entry entry;
struct op_sample *sample;

@@ -545,38 +547,34 @@ void sync_buffer(int cpu)
break;

if (is_code(sample->eip)) {
- switch (sample->event) {
- case 0:
- case CPU_IS_KERNEL:
+ flags = sample->event;
+ if (flags & TRACE_BEGIN) {
+ state = sb_bt_start;
+ add_trace_begin();
+ }
+ if (flags & KERNEL_CTX_SWITCH) {
/* kernel/userspace switch */
- in_kernel = sample->event;
+ in_kernel = flags & IS_KERNEL;
if (state == sb_buffer_start)
state = sb_sample_start;
- add_kernel_ctx_switch(sample->event);
- break;
- case CPU_TRACE_BEGIN:
- state = sb_bt_start;
- add_trace_begin();
- break;
-#ifdef CONFIG_OPROFILE_IBS
- case IBS_FETCH_BEGIN:
- add_ibs_begin(cpu, IBS_FETCH_CODE, mm);
- break;
- case IBS_OP_BEGIN:
- add_ibs_begin(cpu, IBS_OP_CODE, mm);
- break;
-#endif
- default:
+ add_kernel_ctx_switch(flags & IS_KERNEL);
+ }
+ if (flags & USER_CTX_SWITCH) {
/* userspace context switch */
oldmm = mm;
- new = (struct task_struct *)sample->event;
+ new = (struct task_struct *)sample->data[0];
release_mm(oldmm);
mm = take_tasks_mm(new);
if (mm != oldmm)
cookie = get_exec_dcookie(mm);
add_user_ctx_switch(new, cookie);
- break;
}
+#ifdef CONFIG_OPROFILE_IBS
+ if (flags & IBS_FETCH_BEGIN)
+ add_ibs_begin(cpu, IBS_FETCH_CODE, mm);
+ if (flags & IBS_OP_BEGIN)
+ add_ibs_begin(cpu, IBS_OP_CODE, mm);
+#endif
continue;
}

diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 400f7fc..e859d23 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -212,6 +212,59 @@ unsigned long op_cpu_buffer_entries(int cpu)
+ ring_buffer_entries_cpu(op_ring_buffer_write, cpu);
}

+static int
+op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
+ int is_kernel, struct task_struct *task)
+{
+ struct op_entry entry;
+ struct op_sample *sample;
+ unsigned long flags;
+ int size;
+
+ flags = 0;
+
+ if (backtrace)
+ flags |= TRACE_BEGIN;
+
+ /* notice a switch from user->kernel or vice versa */
+ is_kernel = !!is_kernel;
+ if (cpu_buf->last_is_kernel != is_kernel) {
+ cpu_buf->last_is_kernel = is_kernel;
+ flags |= KERNEL_CTX_SWITCH;
+ if (is_kernel)
+ flags |= IS_KERNEL;
+ }
+
+ /* notice a task switch */
+ if (cpu_buf->last_task != task) {
+ cpu_buf->last_task = task;
+ flags |= USER_CTX_SWITCH;
+ }
+
+ if (!flags)
+ /* nothing to do */
+ return 0;
+
+ if (flags & USER_CTX_SWITCH)
+ size = 1;
+ else
+ size = 0;
+
+ sample = op_cpu_buffer_write_reserve(&entry, size);
+ if (!sample)
+ return -ENOMEM;
+
+ sample->eip = ESCAPE_CODE;
+ sample->event = flags;
+
+ if (size)
+ sample->data[0] = (unsigned long)task;
+
+ op_cpu_buffer_write_commit(&entry);
+
+ return 0;
+}
+
static inline int
op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
unsigned long pc, unsigned long event)
@@ -229,26 +282,18 @@ op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
return op_cpu_buffer_write_commit(&entry);
}

-static inline int
-add_code(struct oprofile_cpu_buffer *buffer, unsigned long value)
-{
- return op_add_sample(buffer, ESCAPE_CODE, value);
-}
-
-/* This must be safe from any context. It's safe writing here
- * because of the head/tail separation of the writer and reader
- * of the CPU buffer.
+/*
+ * This must be safe from any context.
*
* is_kernel is needed because on some architectures you cannot
* tell if you are in kernel or user space simply by looking at
* pc. We tag this in the buffer by generating kernel enter/exit
* events whenever is_kernel changes
*/
-static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
- int is_kernel, unsigned long event)
+static int
+log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
+ unsigned long backtrace, int is_kernel, unsigned long event)
{
- struct task_struct *task;
-
cpu_buf->sample_received++;

if (pc == ESCAPE_CODE) {
@@ -256,23 +301,8 @@ static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
return 0;
}

- is_kernel = !!is_kernel;
-
- task = current;
-
- /* notice a switch from user->kernel or vice versa */
- if (cpu_buf->last_is_kernel != is_kernel) {
- cpu_buf->last_is_kernel = is_kernel;
- if (add_code(cpu_buf, is_kernel))
- goto fail;
- }
-
- /* notice a task switch */
- if (cpu_buf->last_task != task) {
- cpu_buf->last_task = task;
- if (add_code(cpu_buf, (unsigned long)task))
- goto fail;
- }
+ if (op_add_code(cpu_buf, backtrace, is_kernel, current))
+ goto fail;

if (op_add_sample(cpu_buf, pc, event))
goto fail;
@@ -286,7 +316,6 @@ fail:

static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
{
- add_code(cpu_buf, CPU_TRACE_BEGIN);
cpu_buf->tracing = 1;
}

@@ -300,21 +329,21 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
unsigned long event, int is_kernel)
{
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
-
- if (!oprofile_backtrace_depth) {
- log_sample(cpu_buf, pc, is_kernel, event);
- return;
- }
-
- oprofile_begin_trace(cpu_buf);
+ unsigned long backtrace = oprofile_backtrace_depth;

/*
* if log_sample() fail we can't backtrace since we lost the
* source of this event
*/
- if (log_sample(cpu_buf, pc, is_kernel, event))
- oprofile_ops.backtrace(regs, oprofile_backtrace_depth);
+ if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event))
+ /* failed */
+ return;

+ if (!backtrace)
+ return;
+
+ oprofile_begin_trace(cpu_buf);
+ oprofile_ops.backtrace(regs, backtrace);
oprofile_end_trace(cpu_buf);
}

@@ -339,29 +368,14 @@ void oprofile_add_ibs_sample(struct pt_regs * const regs,
{
int is_kernel = !user_mode(regs);
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
- struct task_struct *task;
int fail = 0;

cpu_buf->sample_received++;

- /* notice a switch from user->kernel or vice versa */
- if (cpu_buf->last_is_kernel != is_kernel) {
- if (add_code(cpu_buf, is_kernel))
- goto fail;
- cpu_buf->last_is_kernel = is_kernel;
- }
+ /* backtraces disabled for ibs */
+ fail = fail || op_add_code(cpu_buf, 0, is_kernel, current);

- /* notice a task switch */
- if (!is_kernel) {
- task = current;
- if (cpu_buf->last_task != task) {
- if (add_code(cpu_buf, (unsigned long)task))
- goto fail;
- cpu_buf->last_task = task;
- }
- }
-
- fail = fail || add_code(cpu_buf, ibs_code);
+ fail = fail || op_add_sample(cpu_buf, ESCAPE_CODE, ibs_code);
fail = fail || op_add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]);
fail = fail || op_add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]);
fail = fail || op_add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]);
@@ -372,11 +386,8 @@ void oprofile_add_ibs_sample(struct pt_regs * const regs,
fail = fail || op_add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]);
}

- if (!fail)
- return;
-
-fail:
- cpu_buf->sample_lost_overflow++;
+ if (fail)
+ cpu_buf->sample_lost_overflow++;
}

#endif
@@ -384,7 +395,7 @@ fail:
void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
{
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
- log_sample(cpu_buf, pc, is_kernel, event);
+ log_sample(cpu_buf, pc, 0, is_kernel, event);
}

void oprofile_add_trace(unsigned long pc)
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index d7c0545..e634dcf 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -78,10 +78,12 @@ int op_cpu_buffer_write_commit(struct op_entry *entry);
struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu);
unsigned long op_cpu_buffer_entries(int cpu);

-/* transient events for the CPU buffer -> event buffer */
-#define CPU_IS_KERNEL 1
-#define CPU_TRACE_BEGIN 2
-#define IBS_FETCH_BEGIN 3
-#define IBS_OP_BEGIN 4
+/* extra data flags */
+#define KERNEL_CTX_SWITCH (1UL << 0)
+#define IS_KERNEL (1UL << 1)
+#define TRACE_BEGIN (1UL << 2)
+#define USER_CTX_SWITCH (1UL << 3)
+#define IBS_FETCH_BEGIN (1UL << 4)
+#define IBS_OP_BEGIN (1UL << 5)

#endif /* OPROFILE_CPU_BUFFER_H */
--
1.6.0.1

Subject: [PATCH 7/9] ring_buffer: fix ring_buffer_event_length()

Function ring_buffer_event_length() provides an interface to detect
the length of data stored in an entry. However, the length contains
offsets depending on the internal usage. This makes it unusable. This
patch fixes this and now ring_buffer_event_length() returns the
alligned length that has been used in ring_buffer_lock_reserve().

Cc: Steven Rostedt <[email protected]>
Signed-off-by: Robert Richter <[email protected]>
---
kernel/trace/ring_buffer.c | 8 +++++++-
1 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 30d57dd..d42b882 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -117,7 +117,13 @@ rb_event_length(struct ring_buffer_event *event)
*/
unsigned ring_buffer_event_length(struct ring_buffer_event *event)
{
- return rb_event_length(event);
+ unsigned length = rb_event_length(event);
+ if (event->type != RINGBUF_TYPE_DATA)
+ return length;
+ length -= RB_EVNT_HDR_SIZE;
+ if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
+ length -= sizeof(event->array[0]);
+ return length;
}
EXPORT_SYMBOL_GPL(ring_buffer_event_length);

--
1.6.0.1

Subject: [PATCH 8/9] oprofile: remove #ifdef CONFIG_OPROFILE_IBS in non-ibs code

The ifdefs can be removed since the code is no longer ibs specific and
can be used for other purposes as well. IBS specific code is only in
op_model_amd.c.

Signed-off-by: Robert Richter <[email protected]>
---
drivers/oprofile/buffer_sync.c | 6 ------
drivers/oprofile/cpu_buffer.c | 4 ----
2 files changed, 0 insertions(+), 10 deletions(-)

diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index d692fdc..ac014cb 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -316,8 +316,6 @@ static void add_trace_begin(void)
add_event_entry(TRACE_BEGIN_CODE);
}

-#ifdef CONFIG_OPROFILE_IBS
-
static void add_data(struct op_entry *entry, struct mm_struct *mm)
{
unsigned long code, pc, val;
@@ -355,8 +353,6 @@ static void add_data(struct op_entry *entry, struct mm_struct *mm)
add_event_entry(val);
}

-#endif
-
static inline void add_sample_entry(unsigned long offset, unsigned long event)
{
add_event_entry(offset);
@@ -544,10 +540,8 @@ void sync_buffer(int cpu)
cookie = get_exec_dcookie(mm);
add_user_ctx_switch(new, cookie);
}
-#ifdef CONFIG_OPROFILE_IBS
if (op_cpu_buffer_get_size(&entry))
add_data(&entry, mm);
-#endif
continue;
}

diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index ddba9d0..b846af6 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -361,8 +361,6 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
__oprofile_add_ext_sample(pc, regs, event, is_kernel);
}

-#ifdef CONFIG_OPROFILE_IBS
-
/*
* Add samples with data to the ring buffer.
*
@@ -397,8 +395,6 @@ fail:
cpu_buf->sample_lost_overflow++;
}

-#endif
-
void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
{
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
--
1.6.0.1

Subject: [PATCH 5/9] oprofile: add op_cpu_buffer_get_data()

This function provides access to attached data of a sample. It returns
the size of data including the current value. Also,
op_cpu_buffer_get_size() is available to check if there is data
attached.

Signed-off-by: Robert Richter <[email protected]>
---
drivers/oprofile/buffer_sync.c | 6 ++++--
drivers/oprofile/cpu_buffer.h | 20 ++++++++++++++++++++
2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index d969bb1..f9031d3 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -524,6 +524,7 @@ void sync_buffer(int cpu)
{
struct mm_struct *mm = NULL;
struct mm_struct *oldmm;
+ unsigned long val;
struct task_struct *new;
unsigned long cookie = 0;
int in_kernel = 1;
@@ -559,10 +560,11 @@ void sync_buffer(int cpu)
state = sb_sample_start;
add_kernel_ctx_switch(flags & IS_KERNEL);
}
- if (flags & USER_CTX_SWITCH) {
+ if (flags & USER_CTX_SWITCH
+ && op_cpu_buffer_get_data(&entry, &val)) {
/* userspace context switch */
+ new = (struct task_struct *)val;
oldmm = mm;
- new = (struct task_struct *)sample->data[0];
release_mm(oldmm);
mm = take_tasks_mm(new);
if (mm != oldmm)
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index e178dd2..f343760 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -90,6 +90,26 @@ int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val)
return entry->size;
}

+/* returns the size of data in the entry */
+static inline
+int op_cpu_buffer_get_size(struct op_entry *entry)
+{
+ return entry->size;
+}
+
+/* returns 0 if empty or the size of data including the current value */
+static inline
+int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val)
+{
+ int size = entry->size;
+ if (!size)
+ return 0;
+ *val = *entry->data;
+ entry->size--;
+ entry->data++;
+ return size;
+}
+
/* extra data flags */
#define KERNEL_CTX_SWITCH (1UL << 0)
#define IS_KERNEL (1UL << 1)
--
1.6.0.1

Subject: [PATCH 6/9] oprofile: use new data sample format for ibs

The new ring buffer implementation allows the storage of samples with
different size. This patch implements the usage of the new sample
format to store ibs samples in the cpu buffer. Until now, writing to
the cpu buffer could lead to incomplete sampling sequences since IBS
samples were transfered in multiple samples. Due to a full buffer,
data could be lost at any time. This can't happen any more since the
complete data is reserved in advance and then stored in a single
sample.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 119 +++++++++++++-------------------------
drivers/oprofile/buffer_sync.c | 53 ++++-------------
drivers/oprofile/cpu_buffer.c | 39 +++++++-----
drivers/oprofile/cpu_buffer.h | 2 -
4 files changed, 76 insertions(+), 137 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index f101724..cf310ae 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -22,6 +22,7 @@

#include "op_x86_model.h"
#include "op_counter.h"
+#include "../../../drivers/oprofile/cpu_buffer.h"

#define NUM_COUNTERS 4
#define NUM_CONTROLS 4
@@ -60,51 +61,16 @@ static unsigned long reset_value[NUM_COUNTERS];
#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */
#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */

-/* Codes used in cpu_buffer.c */
-/* This produces duplicate code, need to be fixed */
-#define IBS_FETCH_BEGIN (1UL << 4)
-#define IBS_OP_BEGIN (1UL << 5)
-
/*
* The function interface needs to be fixed, something like add
* data. Should then be added to linux/oprofile.h.
*/
-extern void
-oprofile_add_ibs_sample(struct pt_regs * const regs,
- unsigned int * const ibs_sample, int ibs_code);
-
-struct ibs_fetch_sample {
- /* MSRC001_1031 IBS Fetch Linear Address Register */
- unsigned int ibs_fetch_lin_addr_low;
- unsigned int ibs_fetch_lin_addr_high;
- /* MSRC001_1030 IBS Fetch Control Register */
- unsigned int ibs_fetch_ctl_low;
- unsigned int ibs_fetch_ctl_high;
- /* MSRC001_1032 IBS Fetch Physical Address Register */
- unsigned int ibs_fetch_phys_addr_low;
- unsigned int ibs_fetch_phys_addr_high;
-};
+extern
+void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs,
+ unsigned long pc, int code, int size);

-struct ibs_op_sample {
- /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */
- unsigned int ibs_op_rip_low;
- unsigned int ibs_op_rip_high;
- /* MSRC001_1035 IBS Op Data Register */
- unsigned int ibs_op_data1_low;
- unsigned int ibs_op_data1_high;
- /* MSRC001_1036 IBS Op Data 2 Register */
- unsigned int ibs_op_data2_low;
- unsigned int ibs_op_data2_high;
- /* MSRC001_1037 IBS Op Data 3 Register */
- unsigned int ibs_op_data3_low;
- unsigned int ibs_op_data3_high;
- /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */
- unsigned int ibs_dc_linear_low;
- unsigned int ibs_dc_linear_high;
- /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */
- unsigned int ibs_dc_phys_low;
- unsigned int ibs_dc_phys_high;
-};
+#define IBS_FETCH_SIZE 6
+#define IBS_OP_SIZE 12

static int has_ibs; /* AMD Family10h and later */

@@ -197,9 +163,9 @@ static inline int
op_amd_handle_ibs(struct pt_regs * const regs,
struct op_msrs const * const msrs)
{
- unsigned int low, high;
- struct ibs_fetch_sample ibs_fetch;
- struct ibs_op_sample ibs_op;
+ u32 low, high;
+ u64 msr;
+ struct op_entry entry;

if (!has_ibs)
return 1;
@@ -207,21 +173,19 @@ op_amd_handle_ibs(struct pt_regs * const regs,
if (ibs_config.fetch_enabled) {
rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
if (high & IBS_FETCH_HIGH_VALID_BIT) {
- ibs_fetch.ibs_fetch_ctl_high = high;
- ibs_fetch.ibs_fetch_ctl_low = low;
- rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high);
- ibs_fetch.ibs_fetch_lin_addr_high = high;
- ibs_fetch.ibs_fetch_lin_addr_low = low;
- rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high);
- ibs_fetch.ibs_fetch_phys_addr_high = high;
- ibs_fetch.ibs_fetch_phys_addr_low = low;
-
- oprofile_add_ibs_sample(regs,
- (unsigned int *)&ibs_fetch,
- IBS_FETCH_BEGIN);
+ rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr);
+ oprofile_add_data(&entry, regs, msr, IBS_FETCH_CODE,
+ IBS_FETCH_SIZE);
+ op_cpu_buffer_add_data(&entry, (u32)msr);
+ op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ op_cpu_buffer_add_data(&entry, low);
+ op_cpu_buffer_add_data(&entry, high);
+ rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr);
+ op_cpu_buffer_add_data(&entry, (u32)msr);
+ op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ op_cpu_buffer_write_commit(&entry);

/* reenable the IRQ */
- rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
high &= ~IBS_FETCH_HIGH_VALID_BIT;
high |= IBS_FETCH_HIGH_ENABLE;
low &= IBS_FETCH_LOW_MAX_CNT_MASK;
@@ -232,30 +196,29 @@ op_amd_handle_ibs(struct pt_regs * const regs,
if (ibs_config.op_enabled) {
rdmsr(MSR_AMD64_IBSOPCTL, low, high);
if (low & IBS_OP_LOW_VALID_BIT) {
- rdmsr(MSR_AMD64_IBSOPRIP, low, high);
- ibs_op.ibs_op_rip_low = low;
- ibs_op.ibs_op_rip_high = high;
- rdmsr(MSR_AMD64_IBSOPDATA, low, high);
- ibs_op.ibs_op_data1_low = low;
- ibs_op.ibs_op_data1_high = high;
- rdmsr(MSR_AMD64_IBSOPDATA2, low, high);
- ibs_op.ibs_op_data2_low = low;
- ibs_op.ibs_op_data2_high = high;
- rdmsr(MSR_AMD64_IBSOPDATA3, low, high);
- ibs_op.ibs_op_data3_low = low;
- ibs_op.ibs_op_data3_high = high;
- rdmsr(MSR_AMD64_IBSDCLINAD, low, high);
- ibs_op.ibs_dc_linear_low = low;
- ibs_op.ibs_dc_linear_high = high;
- rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high);
- ibs_op.ibs_dc_phys_low = low;
- ibs_op.ibs_dc_phys_high = high;
+ rdmsrl(MSR_AMD64_IBSOPRIP, msr);
+ oprofile_add_data(&entry, regs, msr, IBS_OP_CODE,
+ IBS_OP_SIZE);
+ op_cpu_buffer_add_data(&entry, (u32)msr);
+ op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ rdmsrl(MSR_AMD64_IBSOPDATA, msr);
+ op_cpu_buffer_add_data(&entry, (u32)msr);
+ op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ rdmsrl(MSR_AMD64_IBSOPDATA2, msr);
+ op_cpu_buffer_add_data(&entry, (u32)msr);
+ op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ rdmsrl(MSR_AMD64_IBSOPDATA3, msr);
+ op_cpu_buffer_add_data(&entry, (u32)msr);
+ op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ rdmsrl(MSR_AMD64_IBSDCLINAD, msr);
+ op_cpu_buffer_add_data(&entry, (u32)msr);
+ op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr);
+ op_cpu_buffer_add_data(&entry, (u32)msr);
+ op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ op_cpu_buffer_write_commit(&entry);

/* reenable the IRQ */
- oprofile_add_ibs_sample(regs,
- (unsigned int *)&ibs_op,
- IBS_OP_BEGIN);
- rdmsr(MSR_AMD64_IBSOPCTL, low, high);
high = 0;
low &= ~IBS_OP_LOW_VALID_BIT;
low |= IBS_OP_LOW_ENABLE;
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index f9031d3..d692fdc 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -318,29 +318,18 @@ static void add_trace_begin(void)

#ifdef CONFIG_OPROFILE_IBS

-#define IBS_FETCH_CODE_SIZE 2
-#define IBS_OP_CODE_SIZE 5
-
-/*
- * Add IBS fetch and op entries to event buffer
- */
-static void add_ibs_begin(int cpu, int code, struct mm_struct *mm)
+static void add_data(struct op_entry *entry, struct mm_struct *mm)
{
- unsigned long pc;
- int i, count;
- unsigned long cookie = 0;
+ unsigned long code, pc, val;
+ unsigned long cookie;
off_t offset;
- struct op_entry entry;
- struct op_sample *sample;

- sample = op_cpu_buffer_read_entry(&entry, cpu);
- if (!sample)
+ if (!op_cpu_buffer_get_data(entry, &code))
+ return;
+ if (!op_cpu_buffer_get_data(entry, &pc))
+ return;
+ if (!op_cpu_buffer_get_size(entry))
return;
- pc = sample->eip;
-
-#ifdef __LP64__
- pc += sample->event << 32;
-#endif

if (mm) {
cookie = lookup_dcookie(mm, pc, &offset);
@@ -362,24 +351,8 @@ static void add_ibs_begin(int cpu, int code, struct mm_struct *mm)
add_event_entry(code);
add_event_entry(offset); /* Offset from Dcookie */

- /* we send the Dcookie offset, but send the raw Linear Add also*/
- add_event_entry(sample->eip);
- add_event_entry(sample->event);
-
- if (code == IBS_FETCH_CODE)
- count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/
- else
- count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/
-
- for (i = 0; i < count; i++) {
- sample = op_cpu_buffer_read_entry(&entry, cpu);
- if (!sample)
- return;
- add_event_entry(sample->eip);
- add_event_entry(sample->event);
- }
-
- return;
+ while (op_cpu_buffer_get_data(entry, &val))
+ add_event_entry(val);
}

#endif
@@ -572,10 +545,8 @@ void sync_buffer(int cpu)
add_user_ctx_switch(new, cookie);
}
#ifdef CONFIG_OPROFILE_IBS
- if (flags & IBS_FETCH_BEGIN)
- add_ibs_begin(cpu, IBS_FETCH_CODE, mm);
- if (flags & IBS_OP_BEGIN)
- add_ibs_begin(cpu, IBS_OP_CODE, mm);
+ if (op_cpu_buffer_get_size(&entry))
+ add_data(&entry, mm);
#endif
continue;
}
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 1b65907..ddba9d0 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -363,31 +363,38 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)

#ifdef CONFIG_OPROFILE_IBS

-void oprofile_add_ibs_sample(struct pt_regs * const regs,
- unsigned int * const ibs_sample, int ibs_code)
+/*
+ * Add samples with data to the ring buffer.
+ *
+ * Use op_cpu_buffer_add_data(&entry, val) to add data and
+ * op_cpu_buffer_write_commit(&entry) to commit the sample.
+ */
+void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs,
+ unsigned long pc, int code, int size)
{
+ struct op_sample *sample;
int is_kernel = !user_mode(regs);
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
- int fail = 0;

cpu_buf->sample_received++;

- /* backtraces disabled for ibs */
- fail = fail || op_add_code(cpu_buf, 0, is_kernel, current);
+ /* no backtraces for samples with data */
+ if (op_add_code(cpu_buf, 0, is_kernel, current))
+ goto fail;

- fail = fail || op_add_sample(cpu_buf, ESCAPE_CODE, ibs_code);
- fail = fail || op_add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]);
- fail = fail || op_add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]);
- fail = fail || op_add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]);
+ sample = op_cpu_buffer_write_reserve(entry, size + 2);
+ if (!sample)
+ goto fail;
+ sample->eip = ESCAPE_CODE;
+ sample->event = 0; /* no flags */

- if (ibs_code == IBS_OP_BEGIN) {
- fail = fail || op_add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]);
- fail = fail || op_add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]);
- fail = fail || op_add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]);
- }
+ op_cpu_buffer_add_data(entry, code);
+ op_cpu_buffer_add_data(entry, pc);
+
+ return;

- if (fail)
- cpu_buf->sample_lost_overflow++;
+fail:
+ cpu_buf->sample_lost_overflow++;
}

#endif
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index f343760..525cc4d 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -115,7 +115,5 @@ int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val)
#define IS_KERNEL (1UL << 1)
#define TRACE_BEGIN (1UL << 2)
#define USER_CTX_SWITCH (1UL << 3)
-#define IBS_FETCH_BEGIN (1UL << 4)
-#define IBS_OP_BEGIN (1UL << 5)

#endif /* OPROFILE_CPU_BUFFER_H */
--
1.6.0.1

Subject: [PATCH 9/9] oprofile: make new cpu buffer functions part of the api

This patch creates the new functions

oprofile_write_reserve()
oprofile_add_data()
oprofile_write_commit()

and makes them part of the oprofile api.

Signed-off-by: Robert Richter <[email protected]>
---
arch/x86/oprofile/op_model_amd.c | 57 ++++++++++++++++----------------------
drivers/oprofile/cpu_buffer.c | 17 +++++++++--
drivers/oprofile/cpu_buffer.h | 8 +----
include/linux/oprofile.h | 18 ++++++++++++
4 files changed, 57 insertions(+), 43 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index cf310ae..8fdf06e 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -22,7 +22,6 @@

#include "op_x86_model.h"
#include "op_counter.h"
-#include "../../../drivers/oprofile/cpu_buffer.h"

#define NUM_COUNTERS 4
#define NUM_CONTROLS 4
@@ -61,14 +60,6 @@ static unsigned long reset_value[NUM_COUNTERS];
#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */
#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */

-/*
- * The function interface needs to be fixed, something like add
- * data. Should then be added to linux/oprofile.h.
- */
-extern
-void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs,
- unsigned long pc, int code, int size);
-
#define IBS_FETCH_SIZE 6
#define IBS_OP_SIZE 12

@@ -174,16 +165,16 @@ op_amd_handle_ibs(struct pt_regs * const regs,
rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
if (high & IBS_FETCH_HIGH_VALID_BIT) {
rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr);
- oprofile_add_data(&entry, regs, msr, IBS_FETCH_CODE,
- IBS_FETCH_SIZE);
- op_cpu_buffer_add_data(&entry, (u32)msr);
- op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
- op_cpu_buffer_add_data(&entry, low);
- op_cpu_buffer_add_data(&entry, high);
+ oprofile_write_reserve(&entry, regs, msr,
+ IBS_FETCH_CODE, IBS_FETCH_SIZE);
+ oprofile_add_data(&entry, (u32)msr);
+ oprofile_add_data(&entry, (u32)(msr >> 32));
+ oprofile_add_data(&entry, low);
+ oprofile_add_data(&entry, high);
rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr);
- op_cpu_buffer_add_data(&entry, (u32)msr);
- op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
- op_cpu_buffer_write_commit(&entry);
+ oprofile_add_data(&entry, (u32)msr);
+ oprofile_add_data(&entry, (u32)(msr >> 32));
+ oprofile_write_commit(&entry);

/* reenable the IRQ */
high &= ~IBS_FETCH_HIGH_VALID_BIT;
@@ -197,26 +188,26 @@ op_amd_handle_ibs(struct pt_regs * const regs,
rdmsr(MSR_AMD64_IBSOPCTL, low, high);
if (low & IBS_OP_LOW_VALID_BIT) {
rdmsrl(MSR_AMD64_IBSOPRIP, msr);
- oprofile_add_data(&entry, regs, msr, IBS_OP_CODE,
- IBS_OP_SIZE);
- op_cpu_buffer_add_data(&entry, (u32)msr);
- op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ oprofile_write_reserve(&entry, regs, msr,
+ IBS_OP_CODE, IBS_OP_SIZE);
+ oprofile_add_data(&entry, (u32)msr);
+ oprofile_add_data(&entry, (u32)(msr >> 32));
rdmsrl(MSR_AMD64_IBSOPDATA, msr);
- op_cpu_buffer_add_data(&entry, (u32)msr);
- op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ oprofile_add_data(&entry, (u32)msr);
+ oprofile_add_data(&entry, (u32)(msr >> 32));
rdmsrl(MSR_AMD64_IBSOPDATA2, msr);
- op_cpu_buffer_add_data(&entry, (u32)msr);
- op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ oprofile_add_data(&entry, (u32)msr);
+ oprofile_add_data(&entry, (u32)(msr >> 32));
rdmsrl(MSR_AMD64_IBSOPDATA3, msr);
- op_cpu_buffer_add_data(&entry, (u32)msr);
- op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ oprofile_add_data(&entry, (u32)msr);
+ oprofile_add_data(&entry, (u32)(msr >> 32));
rdmsrl(MSR_AMD64_IBSDCLINAD, msr);
- op_cpu_buffer_add_data(&entry, (u32)msr);
- op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+ oprofile_add_data(&entry, (u32)msr);
+ oprofile_add_data(&entry, (u32)(msr >> 32));
rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr);
- op_cpu_buffer_add_data(&entry, (u32)msr);
- op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
- op_cpu_buffer_write_commit(&entry);
+ oprofile_add_data(&entry, (u32)msr);
+ oprofile_add_data(&entry, (u32)(msr >> 32));
+ oprofile_write_commit(&entry);

/* reenable the IRQ */
high = 0;
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index b846af6..2e03b6d 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -364,10 +364,11 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
/*
* Add samples with data to the ring buffer.
*
- * Use op_cpu_buffer_add_data(&entry, val) to add data and
- * op_cpu_buffer_write_commit(&entry) to commit the sample.
+ * Use oprofile_add_data(&entry, val) to add data and
+ * oprofile_write_commit(&entry) to commit the sample.
*/
-void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs,
+void
+oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs,
unsigned long pc, int code, int size)
{
struct op_sample *sample;
@@ -395,6 +396,16 @@ fail:
cpu_buf->sample_lost_overflow++;
}

+int oprofile_add_data(struct op_entry *entry, unsigned long val)
+{
+ return op_cpu_buffer_add_data(entry, val);
+}
+
+int oprofile_write_commit(struct op_entry *entry)
+{
+ return op_cpu_buffer_write_commit(entry);
+}
+
void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
{
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index 525cc4d..63f81c4 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -35,13 +35,7 @@ struct op_sample {
unsigned long data[0];
};

-struct op_entry {
- struct ring_buffer_event *event;
- struct op_sample *sample;
- unsigned long irq_flags;
- unsigned long size;
- unsigned long *data;
-};
+struct op_entry;

struct oprofile_cpu_buffer {
unsigned long buffer_size;
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 1ce9fe5..1d9518b 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -164,4 +164,22 @@ void oprofile_put_buff(unsigned long *buf, unsigned int start,
unsigned long oprofile_get_cpu_buffer_size(void);
void oprofile_cpu_buffer_inc_smpl_lost(void);

+/* cpu buffer functions */
+
+struct op_sample;
+
+struct op_entry {
+ struct ring_buffer_event *event;
+ struct op_sample *sample;
+ unsigned long irq_flags;
+ unsigned long size;
+ unsigned long *data;
+};
+
+void oprofile_write_reserve(struct op_entry *entry,
+ struct pt_regs * const regs,
+ unsigned long pc, int code, int size);
+int oprofile_add_data(struct op_entry *entry, unsigned long val);
+int oprofile_write_commit(struct op_entry *entry);
+
#endif /* OPROFILE_H */
--
1.6.0.1

2009-01-14 21:53:45

by Maynard Johnson

[permalink] [raw]
Subject: Re: [PATCH 0/9] oprofile: implement support of samples with variable data size

Robert Richter wrote:
> This patch series implements support of samples with variable data
> size. Current implementation has a fixed sample size of 2 unsigned
> longs. This makes it hard to implement use cases with a different
> sample size (AMD IBS or power pc cell) since buffer locking is
> required or samples my become incomplete. The internal cpu buffer
> usage has been changed now and allows the attachment of data with
> varable length to samples.
>
> There are patches that also change the current ibs implementation. In
> the end the ibs implentation could be removed from the general
> oprofile code to model specific code. An API is available that
> provides generic functions to use variable sample sizes.
>
> The patches are also available here:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile.git oprofile-for-tip
This patch set looks fine. Nice work! I still need to test on a couple of platforms, though. Feel free to bug me if I don't get back to you with results within a week or so.

-Maynard
>
> -Robert
>
>
>
>
> ------------------------------------------------------------------------------
> Check out the new SourceForge.net Marketplace.
> It is the best place to buy or sell services for
> just about anything Open Source.
> http://p.sf.net/sfu/Xq1LFB
> _______________________________________________
> oprofile-list mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/oprofile-list