2003-08-19 01:24:05

by John Levon

[permalink] [raw]
Subject: [PATCH 1/3] OProfile: reduce allocations of MSR structs


Andi Kleen pointed out the MSRs array was a massive bloat source. Reduce
it somewhat by only allocating the amount actually needed for the CPU type.

Untested on Pentium IV - I don't have a machine.

diff -X dontdiff -Naur linux-cvs/arch/i386/oprofile/nmi_int.c linux-fixes/arch/i386/oprofile/nmi_int.c
--- linux-cvs/arch/i386/oprofile/nmi_int.c 2003-06-18 15:06:05.000000000 +0100
+++ linux-fixes/arch/i386/oprofile/nmi_int.c 2003-08-11 20:40:22.000000000 +0100
@@ -12,6 +12,7 @@
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/sysdev.h>
+#include <linux/slab.h>
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
@@ -91,24 +92,66 @@
{
unsigned int const nr_ctrs = model->num_counters;
unsigned int const nr_ctrls = model->num_controls;
- struct op_msr_group * counters = &msrs->counters;
- struct op_msr_group * controls = &msrs->controls;
+ struct op_msr * counters = msrs->counters;
+ struct op_msr * controls = msrs->controls;
unsigned int i;

for (i = 0; i < nr_ctrs; ++i) {
- rdmsr(counters->addrs[i],
- counters->saved[i].low,
- counters->saved[i].high);
+ rdmsr(counters[i].addr,
+ counters[i].saved.low,
+ counters[i].saved.high);
}

for (i = 0; i < nr_ctrls; ++i) {
- rdmsr(controls->addrs[i],
- controls->saved[i].low,
- controls->saved[i].high);
+ rdmsr(controls[i].addr,
+ controls[i].saved.low,
+ controls[i].saved.high);
}
}

-
+
+static void free_msrs(void)
+{
+ int i;
+ for (i = 0; i < NR_CPUS; ++i) {
+ kfree(cpu_msrs[i].counters);
+ cpu_msrs[i].counters = NULL;
+ kfree(cpu_msrs[i].controls);
+ cpu_msrs[i].controls = NULL;
+ }
+}
+
+
+static int allocate_msrs(void)
+{
+ int success = 1;
+ size_t controls_size = sizeof(struct op_msr) * model->num_controls;
+ size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+
+ int i;
+ for (i = 0; i < NR_CPUS; ++i) {
+ if (!cpu_online(i))
+ continue;
+
+ cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
+ if (!cpu_msrs[i].counters) {
+ success = 0;
+ break;
+ }
+ cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL);
+ if (!cpu_msrs[i].controls) {
+ success = 0;
+ break;
+ }
+ }
+
+ if (!success)
+ free_msrs();
+
+ return success;
+}
+
+
static void nmi_cpu_setup(void * dummy)
{
int cpu = smp_processor_id();
@@ -125,6 +168,9 @@

static int nmi_setup(void)
{
+ if (!allocate_msrs())
+ return -ENOMEM;
+
/* We walk a thin line between law and rape here.
* We need to be careful to install our NMI handler
* without actually triggering any NMIs as this will
@@ -142,20 +188,20 @@
{
unsigned int const nr_ctrs = model->num_counters;
unsigned int const nr_ctrls = model->num_controls;
- struct op_msr_group * counters = &msrs->counters;
- struct op_msr_group * controls = &msrs->controls;
+ struct op_msr * counters = msrs->counters;
+ struct op_msr * controls = msrs->controls;
unsigned int i;

for (i = 0; i < nr_ctrls; ++i) {
- wrmsr(controls->addrs[i],
- controls->saved[i].low,
- controls->saved[i].high);
+ wrmsr(controls[i].addr,
+ controls[i].saved.low,
+ controls[i].saved.high);
}

for (i = 0; i < nr_ctrs; ++i) {
- wrmsr(counters->addrs[i],
- counters->saved[i].low,
- counters->saved[i].high);
+ wrmsr(counters[i].addr,
+ counters[i].saved.low,
+ counters[i].saved.high);
}
}

@@ -185,6 +231,7 @@
on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
unset_nmi_callback();
enable_lapic_nmi_watchdog();
+ free_msrs();
}


diff -X dontdiff -Naur linux-cvs/arch/i386/oprofile/op_model_athlon.c linux-fixes/arch/i386/oprofile/op_model_athlon.c
--- linux-cvs/arch/i386/oprofile/op_model_athlon.c 2003-06-15 02:06:38.000000000 +0100
+++ linux-fixes/arch/i386/oprofile/op_model_athlon.c 2003-08-11 20:28:31.000000000 +0100
@@ -20,12 +20,12 @@
#define NUM_COUNTERS 4
#define NUM_CONTROLS 4

-#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters.addrs[(c)], (l), (h));} while (0)
-#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters.addrs[(c)], -(unsigned int)(l), -1);} while (0)
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))

-#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls.addrs[(c)], (l), (h));} while (0)
-#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls.addrs[(c)], (l), (h));} while (0)
+#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
#define CTRL_CLEAR(x) (x &= (1<<21))
@@ -39,15 +39,15 @@

static void athlon_fill_in_addresses(struct op_msrs * const msrs)
{
- msrs->counters.addrs[0] = MSR_K7_PERFCTR0;
- msrs->counters.addrs[1] = MSR_K7_PERFCTR1;
- msrs->counters.addrs[2] = MSR_K7_PERFCTR2;
- msrs->counters.addrs[3] = MSR_K7_PERFCTR3;
-
- msrs->controls.addrs[0] = MSR_K7_EVNTSEL0;
- msrs->controls.addrs[1] = MSR_K7_EVNTSEL1;
- msrs->controls.addrs[2] = MSR_K7_EVNTSEL2;
- msrs->controls.addrs[3] = MSR_K7_EVNTSEL3;
+ msrs->counters[0].addr = MSR_K7_PERFCTR0;
+ msrs->counters[1].addr = MSR_K7_PERFCTR1;
+ msrs->counters[2].addr = MSR_K7_PERFCTR2;
+ msrs->counters[3].addr = MSR_K7_PERFCTR3;
+
+ msrs->controls[0].addr = MSR_K7_EVNTSEL0;
+ msrs->controls[1].addr = MSR_K7_EVNTSEL1;
+ msrs->controls[2].addr = MSR_K7_EVNTSEL2;
+ msrs->controls[3].addr = MSR_K7_EVNTSEL3;
}


diff -X dontdiff -Naur linux-cvs/arch/i386/oprofile/op_model_p4.c linux-fixes/arch/i386/oprofile/op_model_p4.c
--- linux-cvs/arch/i386/oprofile/op_model_p4.c 2003-08-14 13:21:20.000000000 +0100
+++ linux-fixes/arch/i386/oprofile/op_model_p4.c 2003-08-14 13:19:23.000000000 +0100
@@ -366,8 +366,8 @@
#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
-#define CCCR_READ(low, high, i) do {rdmsr (p4_counters[(i)].cccr_address, (low), (high));} while (0)
-#define CCCR_WRITE(low, high, i) do {wrmsr (p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))

@@ -410,7 +410,7 @@

/* the counter registers we pay attention to */
for (i = 0; i < num_counters; ++i) {
- msrs->counters.addrs[i] =
+ msrs->counters[i].addr =
p4_counters[VIRT_CTR(stag, i)].counter_address;
}

@@ -419,42 +419,42 @@
/* 18 CCCR registers */
for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
- msrs->controls.addrs[i] = addr;
+ msrs->controls[i].addr = addr;
}

/* 43 ESCR registers in three discontiguous group */
for (addr = MSR_P4_BSU_ESCR0 + stag;
addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
- msrs->controls.addrs[i] = addr;
+ msrs->controls[i].addr = addr;
}

for (addr = MSR_P4_MS_ESCR0 + stag;
addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
- msrs->controls.addrs[i] = addr;
+ msrs->controls[i].addr = addr;
}

for (addr = MSR_P4_IX_ESCR0 + stag;
addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
- msrs->controls.addrs[i] = addr;
+ msrs->controls[i].addr = addr;
}

/* there are 2 remaining non-contiguously located ESCRs */

if (num_counters == NUM_COUNTERS_NON_HT) {
/* standard non-HT CPUs handle both remaining ESCRs*/
- msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5;
- msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR4;
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;

} else if (stag == 0) {
/* HT CPUs give the first remainder to the even thread, as
the 32nd control register */
- msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR4;
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;

} else {
/* and two copies of the second to the odd thread,
for the 22st and 23nd control registers */
- msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5;
- msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5;
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
}
}

diff -X dontdiff -Naur linux-cvs/arch/i386/oprofile/op_model_ppro.c linux-fixes/arch/i386/oprofile/op_model_ppro.c
--- linux-cvs/arch/i386/oprofile/op_model_ppro.c 2003-06-15 02:06:38.000000000 +0100
+++ linux-fixes/arch/i386/oprofile/op_model_ppro.c 2003-08-11 20:29:33.000000000 +0100
@@ -20,12 +20,12 @@
#define NUM_COUNTERS 2
#define NUM_CONTROLS 2

-#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters.addrs[(c)], (l), (h));} while (0)
-#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters.addrs[(c)], -(u32)(l), -1);} while (0)
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))

-#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls.addrs[(c)]), (l), (h));} while (0)
-#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls.addrs[(c)]), (l), (h));} while (0)
+#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
#define CTRL_CLEAR(x) (x &= (1<<21))
@@ -39,11 +39,11 @@

static void ppro_fill_in_addresses(struct op_msrs * const msrs)
{
- msrs->counters.addrs[0] = MSR_P6_PERFCTR0;
- msrs->counters.addrs[1] = MSR_P6_PERFCTR1;
+ msrs->counters[0].addr = MSR_P6_PERFCTR0;
+ msrs->counters[1].addr = MSR_P6_PERFCTR1;

- msrs->controls.addrs[0] = MSR_P6_EVNTSEL0;
- msrs->controls.addrs[1] = MSR_P6_EVNTSEL1;
+ msrs->controls[0].addr = MSR_P6_EVNTSEL0;
+ msrs->controls[1].addr = MSR_P6_EVNTSEL1;
}


diff -X dontdiff -Naur linux-cvs/arch/i386/oprofile/op_x86_model.h linux-fixes/arch/i386/oprofile/op_x86_model.h
--- linux-cvs/arch/i386/oprofile/op_x86_model.h 2003-06-15 02:06:38.000000000 +0100
+++ linux-fixes/arch/i386/oprofile/op_x86_model.h 2003-08-11 20:08:27.000000000 +0100
@@ -11,22 +11,19 @@
#ifndef OP_X86_MODEL_H
#define OP_X86_MODEL_H

-/* Pentium IV needs all these */
-#define MAX_MSR 63
-
struct op_saved_msr {
unsigned int high;
unsigned int low;
};

-struct op_msr_group {
- unsigned int addrs[MAX_MSR];
- struct op_saved_msr saved[MAX_MSR];
+struct op_msr {
+ unsigned long addr;
+ struct op_saved_msr saved;
};

struct op_msrs {
- struct op_msr_group counters;
- struct op_msr_group controls;
+ struct op_msr * counters;
+ struct op_msr * controls;
};

struct pt_regs;


2003-08-19 01:24:11

by John Levon

[permalink] [raw]
Subject: [PATCH 3/3] OProfile: add a useful statistic


Add a stat counting the (relatively common) case where a PC value is logged
but there is no (longer) any executable mapping covering that address.

diff -X dontdiff -Naur linux-cvs/drivers/oprofile/buffer_sync.c linux-fixes/drivers/oprofile/buffer_sync.c
--- linux-cvs/drivers/oprofile/buffer_sync.c 2003-06-18 15:06:09.000000000 +0100
+++ linux-fixes/drivers/oprofile/buffer_sync.c 2003-08-19 01:15:36.000000000 +0100
@@ -308,8 +308,10 @@

cookie = lookup_dcookie(mm, s->eip, &offset);

- if (!cookie)
+ if (!cookie) {
+ atomic_inc(&oprofile_stats.sample_lost_no_mapping);
return;
+ }

if (cookie != last_cookie) {
add_cookie_switch(cookie);
diff -X dontdiff -Naur linux-cvs/drivers/oprofile/oprofile_stats.c linux-fixes/drivers/oprofile/oprofile_stats.c
--- linux-cvs/drivers/oprofile/oprofile_stats.c 2003-05-04 02:42:47.000000000 +0100
+++ linux-fixes/drivers/oprofile/oprofile_stats.c 2003-08-19 01:17:17.000000000 +0100
@@ -32,6 +32,7 @@
}

atomic_set(&oprofile_stats.sample_lost_no_mm, 0);
+ atomic_set(&oprofile_stats.sample_lost_no_mapping, 0);
atomic_set(&oprofile_stats.event_lost_overflow, 0);
}

@@ -70,6 +71,8 @@

oprofilefs_create_ro_atomic(sb, dir, "sample_lost_no_mm",
&oprofile_stats.sample_lost_no_mm);
+ oprofilefs_create_ro_atomic(sb, dir, "sample_lost_no_mapping",
+ &oprofile_stats.sample_lost_no_mapping);
oprofilefs_create_ro_atomic(sb, dir, "event_lost_overflow",
&oprofile_stats.event_lost_overflow);
}
diff -X dontdiff -Naur linux-cvs/drivers/oprofile/oprofile_stats.h linux-fixes/drivers/oprofile/oprofile_stats.h
--- linux-cvs/drivers/oprofile/oprofile_stats.h 2003-05-04 02:42:47.000000000 +0100
+++ linux-fixes/drivers/oprofile/oprofile_stats.h 2003-08-19 01:16:23.000000000 +0100
@@ -14,6 +14,7 @@

struct oprofile_stat_struct {
atomic_t sample_lost_no_mm;
+ atomic_t sample_lost_no_mapping;
atomic_t event_lost_overflow;
};


2003-08-19 01:26:15

by John Levon

[permalink] [raw]
Subject: [PATCH 2/3] OProfile: export kernel pointer size in oprofilefs


Tell user-space how big kernel pointers are, as preferable to sniffing /proc/kcore.
Improve the oprofilefs_ulong_to_user() prototype.

diff -X dontdiff -Naur linux-cvs/drivers/oprofile/oprofile_files.c linux-fixes/drivers/oprofile/oprofile_files.c
--- linux-cvs/drivers/oprofile/oprofile_files.c 2003-06-15 02:06:38.000000000 +0100
+++ linux-fixes/drivers/oprofile/oprofile_files.c 2003-08-14 19:19:05.000000000 +0100
@@ -19,6 +19,17 @@
unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */


+static ssize_t pointer_size_read(struct file * file, char * buf, size_t count, loff_t * offset)
+{
+ return oprofilefs_ulong_to_user((unsigned long)sizeof(void *), buf, count, offset);
+}
+
+
+static struct file_operations pointer_size_fops = {
+ .read = pointer_size_read,
+};
+
+
static ssize_t cpu_type_read(struct file * file, char * buf, size_t count, loff_t * offset)
{
return oprofilefs_str_to_user(oprofile_ops->cpu_type, buf, count, offset);
@@ -32,7 +43,7 @@

static ssize_t enable_read(struct file * file, char * buf, size_t count, loff_t * offset)
{
- return oprofilefs_ulong_to_user(&oprofile_started, buf, count, offset);
+ return oprofilefs_ulong_to_user(oprofile_started, buf, count, offset);
}


@@ -85,6 +96,7 @@
oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &fs_cpu_buffer_size);
oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops);
+ oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops);
oprofile_create_stats_files(sb, root);
if (oprofile_ops->create_files)
oprofile_ops->create_files(sb, root);
diff -X dontdiff -Naur linux-cvs/drivers/oprofile/oprofilefs.c linux-fixes/drivers/oprofile/oprofilefs.c
--- linux-cvs/drivers/oprofile/oprofilefs.c 2003-05-26 02:04:50.000000000 +0100
+++ linux-fixes/drivers/oprofile/oprofilefs.c 2003-08-14 19:21:16.000000000 +0100
@@ -69,7 +69,7 @@

#define TMPBUFSIZE 50

-ssize_t oprofilefs_ulong_to_user(unsigned long * val, char * buf, size_t count, loff_t * offset)
+ssize_t oprofilefs_ulong_to_user(unsigned long val, char * buf, size_t count, loff_t * offset)
{
char tmpbuf[TMPBUFSIZE];
size_t maxlen;
@@ -78,7 +78,7 @@
return 0;

spin_lock(&oprofilefs_lock);
- maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%lu\n", *val);
+ maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%lu\n", val);
spin_unlock(&oprofilefs_lock);
if (maxlen > TMPBUFSIZE)
maxlen = TMPBUFSIZE;
@@ -122,7 +122,8 @@

static ssize_t ulong_read_file(struct file * file, char * buf, size_t count, loff_t * offset)
{
- return oprofilefs_ulong_to_user(file->private_data, buf, count, offset);
+ unsigned long * val = file->private_data;
+ return oprofilefs_ulong_to_user(*val, buf, count, offset);
}


@@ -212,9 +213,8 @@

static ssize_t atomic_read_file(struct file * file, char * buf, size_t count, loff_t * offset)
{
- atomic_t * aval = file->private_data;
- unsigned long val = atomic_read(aval);
- return oprofilefs_ulong_to_user(&val, buf, count, offset);
+ atomic_t * val = file->private_data;
+ return oprofilefs_ulong_to_user(atomic_read(val), buf, count, offset);
}


diff -X dontdiff -Naur linux-cvs/include/linux/oprofile.h linux-fixes/include/linux/oprofile.h
--- linux-cvs/include/linux/oprofile.h 2003-06-15 02:06:38.000000000 +0100
+++ linux-fixes/include/linux/oprofile.h 2003-08-14 19:19:42.000000000 +0100
@@ -92,7 +92,7 @@
* Convert an unsigned long value into ASCII and copy it to the user buffer @buf,
* updating *offset appropriately. Returns bytes written or -EFAULT.
*/
-ssize_t oprofilefs_ulong_to_user(unsigned long * val, char * buf, size_t count, loff_t * offset);
+ssize_t oprofilefs_ulong_to_user(unsigned long val, char * buf, size_t count, loff_t * offset);

/**
* Read an ASCII string for a number from a userspace buffer and fill *val on success.