2004-06-22 15:34:57

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [0/23] mmap() support for /proc/profile

/*
I was trying to profile a mostly-idle workload to get an idea of what
area of the kernel things were diving into and falling asleep in during
an OAST run. Without these patches, kerneltop et al showed heavy /proc/
activity along with copy_to_user() at the top of the profiles.

With these patches in place, kernel participation in profile data
movement was greatly reduced, and the profile showed very meaningful
try_atomic_semop() as the area of the kernel being exercised, which
indicated contention for a sysv semaphore originating from userspace.

The program below was used to report the data. Patches vs. 2.6.7-final.


-- wli
*/

#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <ctype.h>
#include <string.h>
#include <curses.h>
#include <signal.h>
#include <time.h>
#include <getopt.h>
#include <limits.h>
#include <termios.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/utsname.h>

#define LINEBUF_SIZE 256
#define SYSTEM_MAP_ERROR 1
#define PROC_PROFILE_ERROR 2
#define INTERVAL_TIMER_ERROR 3
#define TCGETATTR_ERROR 4
#define NCURSES_ERROR 5
#define ARRAY_SIZE(x) ((int)(sizeof(x)/sizeof(x[0])))

struct ksym {
u_int64_t vaddr;
char *s;
struct ksym *next;
};

struct sym {
u_int64_t vaddr;
u_int32_t cur_hits, cum_hits;
char *s;
};

struct prof_state {
WINDOW *window;
struct ksym *ksyms, *end_ksym;
struct sym *symtab, *idle_sym;
char *strtab, *mapfile;
u_int32_t *profile;
u_int64_t start_vaddr, end_vaddr;
size_t profile_size;
int nsyms, tot_strlen, delay, iofd, idle;
struct termios saved_termios[3];
};

static const char proc_profile[] = "/proc/profile";
static struct utsname utsname;
static size_t uts_len = 0;
static int user_interrupt = 0, input_ready = 0, tick_pending, ticks = 0;

static int prepare_to_profile(struct prof_state *, int, char *[]);
static int wait_for_readiness(struct prof_state *);
static void cleanup_state(struct prof_state **);
static struct prof_state *alloc_prof_state(void);
static int display_profile(struct prof_state *);
static void cleanup_system_map(struct prof_state *);
static void unmap_proc_profile(struct prof_state *);
static int parse_system_map(struct prof_state *, int, char *[]);
static int map_proc_profile(struct prof_state *, int, char *[]);
static int setup_interval_timer(struct prof_state *, int, char *[]);
static int push_vaddr(struct prof_state *, u_int64_t, const char *);
static int prof_tabulate_syms(struct prof_state *);
static void cleanup_interval_timer(struct prof_state *);
static int user_input(struct prof_state *);

int main(int argc, char *argv[])
{
int c, err = 0;
long delay;
struct prof_state *state = alloc_prof_state();

if (state == NULL)
return errno;
while ((c = getopt(argc, argv, "d:m:")) != -1) {
switch (c) {
case 'd':
delay = strtol(optarg, NULL, 0);
if (delay < INT_MAX && delay > 0)
state->delay = delay;
else {
perror("bad delay value");
err = errno;
goto out_err;
}
break;
case 'm':
if ((state->mapfile = strdup(optarg)) == NULL) {
err = errno;
goto out_err;
}
break;
default:
break;
}
}
if (!state->delay)
state->delay = 1;
if (prepare_to_profile(state, argc, argv)) {
err = errno;
goto cleanup_state;
}
while (!wait_for_readiness(state)) {
if (user_input(state))
break;
else if (display_profile(state))
break;
}
cleanup_state:
cleanup_state(&state);
out_err:
return err;
}

static int user_input(struct prof_state *state)
{
static char buf[LINEBUF_SIZE];
int n, k;

(void)state;
if (!input_ready)
return 0;
input_ready = 0;
while ((n = read(STDIN_FILENO, buf, sizeof(buf))) > 0) {
for (k = 0; k < n; ++k) {
if (buf[k] == 'q' || buf[k] == 'Q')
return 1;
else if (buf[k] == 'i')
state->idle = !state->idle;
else if (buf[k] == 'w')
tick_pending = 1;
else if (isdigit(buf[k]) && buf[k] != '0') {
struct itimerval new, old;
state->delay = buf[k] - '0';
if (getitimer(ITIMER_REAL, &old))
return 0;
new.it_interval.tv_sec = state->delay;
new.it_interval.tv_usec = 0;
if (old.it_value.tv_sec < state->delay)
memcpy(&new.it_value,
&old.it_value,
sizeof(struct timeval));
else {
new.it_value.tv_sec = state->delay;
new.it_value.tv_usec = 0;
}
setitimer(ITIMER_REAL, &new, NULL);
}
}
}
return 0;
}

static int prepare_to_profile(struct prof_state *state, int argc, char *argv[])
{
int err;

if (tcgetattr(STDIN_FILENO, &state->saved_termios[0]))
return TCGETATTR_ERROR;
else if (tcgetattr(STDOUT_FILENO, &state->saved_termios[1]))
return TCGETATTR_ERROR;
else if (tcgetattr(STDERR_FILENO, &state->saved_termios[2]))
return TCGETATTR_ERROR;
else if (!(state->window = initscr()))
return NCURSES_ERROR;
else if (parse_system_map(state, argc, argv))
return SYSTEM_MAP_ERROR;
else if (map_proc_profile(state, argc, argv)) {
err = PROC_PROFILE_ERROR;
goto cleanup_system_map;
} else if (setup_interval_timer(state, argc, argv)) {
err = INTERVAL_TIMER_ERROR;
goto unmap_proc_profile;
} else
return 0;
unmap_proc_profile:
unmap_proc_profile(state);
cleanup_system_map:
cleanup_system_map(state);
return err;
}

static int parse_system_map(struct prof_state *state, int argc, char *argv[])
{
int fd, err = 0;
size_t len;
struct stat *stbuf;
char *buf;
off_t pos, new_pos;

(void)argc;
(void)argv;
if (uname(&utsname))
return errno;
uts_len = strlen(utsname.sysname) + strlen(utsname.release)
+ strlen(utsname.nodename);
len = strlen("/boot/System.map-") + strlen(utsname.release) + 1;
if (state->mapfile == NULL) {
if ((state->mapfile = malloc(len)) == NULL)
return errno;
memset(state->mapfile, 0, len);
err = snprintf(state->mapfile, len, "/boot/System.map-%s",
utsname.release);
if (err == (int)(len-1))
err = 0;
else {
err = ENOENT;
goto free_mapfile;
}
}
stbuf = malloc(sizeof(struct stat));
if (stbuf == NULL) {
err = errno;
goto free_mapfile;
}
buf = malloc(LINEBUF_SIZE);
if (buf == NULL) {
err = errno;
goto free_stbuf;
}
fd = open(state->mapfile, O_RDONLY);
if (fd < 0) {
err = errno;
goto free_buf;
}
if (fstat(fd, stbuf)) {
err = errno;
goto close_fd;
}
pos = 0;
while (pos < stbuf->st_size) {
int m, k;
u_int64_t vaddr;
pread(fd, buf, LINEBUF_SIZE, pos);
for (new_pos = k = 0; k < LINEBUF_SIZE; ++k) {
if (buf[k] == '\n') {
new_pos = pos + k + 1;
break;
}
}
if (new_pos <= pos) {
err = -ENOENT;
break;
}
vaddr = 0;
for (m = 0; m < k; ++m) {
if (buf[m] == ' ')
break;
else if (!isxdigit(buf[m])) {
err = -EINVAL;
goto close_fd;
}
vaddr <<= 4;
if (buf[m] >= 'a' && buf[m] <= 'f')
vaddr += (int)(buf[m] - 'a') + 10;
else if (buf[m] >= 'A' && buf[m] <= 'F')
vaddr += (int)(buf[m] - 'A') + 10;
else if (buf[m] >= '0' && buf[m] <= '9')
vaddr += (int)(buf[m] - '0');
}
while (m < k && buf[m] == ' ')
++m;
if (m == k || (buf[m] != 't' && buf[m] != 'T'))
goto new_pos;
++m;
while (m < k && buf[m] == ' ')
++m;
if (m == k || buf[m] == '\n')
goto new_pos;
if (push_vaddr(state, vaddr, &buf[m])) {
err = ENOMEM;
goto close_fd;
}

new_pos:
pos = new_pos;
}
if (prof_tabulate_syms(state))
err = errno;
close_fd:
close(fd);
free_buf:
free(buf);
free_stbuf:
free(stbuf);
free_mapfile:
free(state->mapfile);
return err;
}

static struct prof_state *alloc_prof_state(void)
{
struct prof_state *state = malloc(sizeof(struct prof_state));
if (state != NULL)
memset(state, 0, sizeof(struct prof_state));
return state;
}

static int push_vaddr(struct prof_state *state, u_int64_t vaddr, const char *s)
{
struct ksym *ksym = malloc(sizeof(struct ksym));
int k;

if (ksym == NULL)
return errno;
k = 0;
while (s[k] != '\n')
++k;
ksym->s = strndup(s, k);
if (ksym->s == NULL) {
free(ksym);
return errno;
}
ksym->vaddr = vaddr;
ksym->next = NULL;
state->tot_strlen += strlen(ksym->s) + 1;
if (!strcmp(ksym->s, "stext"))
state->start_vaddr = vaddr;
else if (!strcmp(ksym->s, "__sched_text_end"))
state->end_vaddr = vaddr;
if (state->end_ksym == NULL)
state->ksyms = state->end_ksym = ksym;
else {
state->end_ksym->next = ksym;
state->end_ksym = ksym;
}
state->nsyms++;
return 0;
}

static void cleanup_system_map(struct prof_state *state)
{
while (state->ksyms) {
struct ksym *ksym = state->ksyms;
state->ksyms = ksym->next;
free(ksym->s);
free(ksym);
state->nsyms--;
}
state->end_ksym = NULL;
}

static int prof_tabulate_syms(struct prof_state *state)
{
int sym_pos = 0, str_pos = 0;

state->symtab = calloc(state->nsyms, sizeof(struct sym));
if (state->symtab == NULL)
return errno;
state->strtab = malloc(state->tot_strlen);
if (state->strtab == NULL) {
free(state->symtab);
return errno;
}
while (state->ksyms) {
struct ksym *ksym = state->ksyms;

state->symtab[sym_pos].s = &state->strtab[str_pos];
strcpy(state->symtab[sym_pos].s, ksym->s);
str_pos += strlen(ksym->s) + 1;
state->symtab[sym_pos].vaddr = ksym->vaddr;
if (!strcmp(state->symtab[sym_pos].s, "default_idle"))
state->idle_sym = &state->symtab[sym_pos];
sym_pos++;

state->ksyms = ksym->next;
free(ksym->s);
free(ksym);
}
state->end_ksym = NULL;
return 0;
}

static int profile_hit(struct prof_state *state, int n, int queue[], int sym)
{
int m, k;
u_int64_t vaddr, end;
u_int32_t hits = 0;

if (state->symtab[sym].vaddr <= state->start_vaddr)
return 0;
if (state->symtab[sym].vaddr >= state->end_vaddr)
return 0;
if (sym == state->nsyms - 1)
end = state->end_vaddr;
else
end = state->symtab[sym + 1].vaddr;
k = 1+(state->symtab[sym].vaddr - state->start_vaddr)/state->profile[0];
state->symtab[sym].cum_hits += state->symtab[sym].cur_hits;

for (vaddr = state->symtab[sym].vaddr; vaddr < end; vaddr += state->profile[0], ++k)
hits += state->profile[k];
if (hits >= state->symtab[sym].cum_hits)
state->symtab[sym].cur_hits = hits - state->symtab[sym].cum_hits;
else {
state->symtab[sym].cur_hits = 0;
}
if (!state->symtab[sym].cur_hits)
return 0;
if (!state->idle && &state->symtab[sym] == state->idle_sym)
return 0;
for (k = -1, m = 0; m < n; ++m) {
if (queue[m] < 0) {
k = m;
break;
} else if (k < 0) {
if (state->symtab[queue[m]].cur_hits
< state->symtab[sym].cur_hits)
k = m;
} else if (k >= 0) {
if (state->symtab[queue[m]].cur_hits
< state->symtab[queue[k]].cur_hits)
k = m;
}
}
if (k >= 0)
queue[k] = sym;
return state->symtab[sym].cur_hits;
}

static int map_proc_profile(struct prof_state *state, int argc, char *argv[])
{
struct stat *st_buf;
size_t size;
int fd, step;

(void)argc;
(void)argv;
st_buf = malloc(sizeof(struct stat));
if (st_buf == NULL)
return errno;
fd = open(proc_profile, O_RDONLY);
if (fd < 0) {
free(st_buf);
return errno;
}
fstat(fd, st_buf);
size = (st_buf->st_size + getpagesize() - 1) & ~(getpagesize() - 1);
free(st_buf);
state->profile = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
state->profile_size = size;
if (state->profile == MAP_FAILED)
goto close_fd;
step = state->profile[0];
close_fd:
close(fd);
return errno;
}

static void unmap_proc_profile(struct prof_state *state)
{
if (state->profile != NULL && state->profile != MAP_FAILED)
munmap(state->profile, state->profile_size);
}

static int wait_for_readiness(struct prof_state *state)
{
(void)state;
if (pause() != -1)
return errno;
else if (user_interrupt)
return EAGAIN;
else {
errno = 0;
return 0;
}
}

static int display_profile(struct prof_state *state)
{
int queue[LINES - 4];
int victim, m, k, hits = 0;
time_t now;
static char timestr[LINEBUF_SIZE];

if (!tick_pending)
return 0;
tick_pending = 0;
clrtobot();
move(0, 0);
for (k = 0; k < ARRAY_SIZE(queue); ++k)
queue[k] = -1;
for (k = 0; k < state->nsyms; ++k)
hits += profile_hit(state, ARRAY_SIZE(queue), queue, k);
time(&now);
ctime_r(&now, timestr);
printw("%s: %s-%s %*s", utsname.nodename, utsname.sysname,
utsname.release, COLS - uts_len - 4, timestr);
printw("%8d profile hits registered, step %d\n\n", hits, ticks);
attron(A_STANDOUT);
printw("%8s\t%9s\t%*s\n", "TICKS", "%SYS", 33 - COLS, "FUNCTION");
attroff(A_STANDOUT);
for (m = 0; m < ARRAY_SIZE(queue) - 1; ++m) {
if (queue[m] < 0)
break;
for (k = m + 1, victim = -1; k < ARRAY_SIZE(queue); ++k) {
if (queue[k] < 0)
break;
if (victim < 0 && state->symtab[queue[m]].cur_hits
< state->symtab[queue[k]].cur_hits)
victim = k;
else if (victim >= 0 &&
state->symtab[queue[victim]].cur_hits
< state->symtab[queue[k]].cur_hits)
victim = k;
}
if (victim != -1) {
k = queue[victim];
queue[victim] = queue[m];
queue[m] = k;
}
}
for (k = 0; k < ARRAY_SIZE(queue); ++k) {
if (queue[k] < 0)
continue;
printw("%8lu\t%8.4f%%\t%s\n",
(unsigned long)state->symtab[queue[k]].cur_hits,
hits ? 100.0*((double)state->symtab[queue[k]].cur_hits/(double)hits) : 0.0,
state->symtab[queue[k]].s);

}
move(2, 0);
refresh();
return 0;
}

static void cleanup_state(struct prof_state **state)
{
static int fds[3] = { STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO };
int k;
cleanup_interval_timer(*state);
cleanup_system_map(*state);
unmap_proc_profile(*state);
close((*state)->iofd);
refresh();
endwin();
for (k = 0; k <= 2; ++k) {
(*state)->saved_termios[k].c_iflag |= BRKINT;
(*state)->saved_termios[k].c_iflag &= ~ICRNL;
(*state)->saved_termios[k].c_oflag |= ONLCR;
if (tcsetattr(fds[k], TCSANOW, &(*state)->saved_termios[k]) < 0)
fprintf(stderr, "tcsetattr failed\n");
}
free(*state);
*state = NULL;
}

static void action(int sig, siginfo_t *info, void *ucontext)
{
(void)sig;
(void)info;
(void)ucontext;
++ticks;
tick_pending = 1;
}

static void quit(int sig, siginfo_t *info, void *ucontext)
{
(void)sig;
(void)info;
(void)ucontext;
user_interrupt = 1;
}

static void input(int sig, siginfo_t *info, void *ucontext)
{
(void)sig;
(void)info;
(void)ucontext;
input_ready = 1;
}

static int setup_interval_timer(struct prof_state *state, int argc, char *argv[])
{
struct itimerval itimer = {
.it_interval = { .tv_sec = state->delay, .tv_usec = 0, },
.it_value = { .tv_sec = 0, .tv_usec = 100*1000, },
};
struct sigaction act = {
.sa_sigaction = action,
.sa_flags = SA_SIGINFO | SA_RESTART,
};

(void)argc;
(void)argv;
sigemptyset(&act.sa_mask);
if (sigaction(SIGALRM, &act, NULL))
return errno;
else if (setitimer(ITIMER_REAL, &itimer, NULL))
return errno;
act.sa_sigaction = quit;
if (sigaction(SIGINT, &act, NULL))
return errno;
act.sa_sigaction = input;
if (sigaction(SIGUSR1, &act, NULL))
return errno;
if ((state->iofd = dup(STDIN_FILENO)) < 0)
return errno;
else if (fcntl(state->iofd, F_SETFL, O_RDONLY|O_ASYNC|O_NONBLOCK) < 0)
return errno;
else if (fcntl(state->iofd, F_SETOWN, getpid()) < 0)
return errno;
else if (fcntl(state->iofd, F_SETSIG, SIGUSR1) < 0)
return errno;
return 0;
}

static void cleanup_interval_timer(struct prof_state *state)
{
struct itimerval itimer;

(void)state;
if (!getitimer(ITIMER_REAL, &itimer)) {
memset(&itimer.it_value, 0, sizeof(struct timeval));
setitimer(ITIMER_REAL, &itimer, NULL);
}
}


2004-06-22 15:30:15

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [11/23] alpha profiling cleanups

Convert alpha to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/alpha/kernel/irq_impl.h
===================================================================
--- prof-2.6.7.orig/arch/alpha/kernel/irq_impl.h 2004-06-15 22:20:26.000000000 -0700
+++ prof-2.6.7/arch/alpha/kernel/irq_impl.h 2004-06-22 07:25:52.346205336 -0700
@@ -46,26 +46,13 @@
static inline void
alpha_do_profile(unsigned long pc)
{
- extern char _stext;
-
- if (!prof_buffer)
+ if (!profiling_on())
return;

/*
* Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
* (default is all CPUs.)
*/
- if (!((1<<smp_processor_id()) & prof_cpu_mask))
- return;
-
- pc -= (unsigned long) &_stext;
- pc >>= prof_shift;
- /*
- * Don't ignore out-of-bounds PC values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- if (pc > prof_len - 1)
- pc = prof_len - 1;
- atomic_inc((atomic_t *)&prof_buffer[pc]);
+ if ((1<<smp_processor_id()) & prof_cpu_mask)
+ profile_tick(pc);
}

2004-06-22 15:30:12

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [6/23] sparc32 profiling cleanups

Convert sparc32 to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/sparc/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/sparc/kernel/time.c 2004-06-15 22:19:36.000000000 -0700
+++ prof-2.6.7/arch/sparc/kernel/time.c 2004-06-22 07:25:48.065856048 -0700
@@ -84,7 +84,7 @@
/* 32-bit Sparc specific profiling function. */
void sparc_do_profile(unsigned long pc, unsigned long o7)
{
- if(prof_buffer && current->pid) {
+ if(profiling_on() && current->pid) {
extern int _stext;
extern int __copy_user_begin, __copy_user_end;
extern int __atomic_begin, __atomic_end;
@@ -101,14 +101,8 @@
pc < (unsigned long) &__bitops_end))
pc = o7;

- pc -= (unsigned long) &_stext;
- pc >>= prof_shift;
-
spin_lock(&ticker_lock);
- if(pc < prof_len)
- prof_buffer[pc]++;
- else
- prof_buffer[prof_len - 1]++;
+ profile_tick(pc);
spin_unlock(&ticker_lock);
}
}

2004-06-22 15:39:10

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [3/23] mips profiling cleanups

Convert MIPS to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/mips/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/mips/kernel/time.c 2004-06-15 22:19:22.000000000 -0700
+++ prof-2.6.7/arch/mips/kernel/time.c 2004-06-22 07:25:45.377264776 -0700
@@ -24,6 +24,7 @@
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/profile.h>

#include <asm/bootinfo.h>
#include <asm/cpu.h>
@@ -417,22 +418,8 @@
*/
void local_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
- if (!user_mode(regs)) {
- if (prof_buffer && current->pid) {
- unsigned long pc = regs->cp0_epc;
-
- pc -= (unsigned long) _stext;
- pc >>= prof_shift;
- /*
- * Dont ignore out-of-bounds pc values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- if (pc > prof_len - 1)
- pc = prof_len - 1;
- atomic_inc((atomic_t *)&prof_buffer[pc]);
- }
- }
+ if (!user_mode(regs) && profiling_on() && current->pid)
+ profile_tick(regs->cp0_epc);

#ifdef CONFIG_SMP
/* in UP mode, update_process_times() is invoked by do_timer() */

2004-06-22 15:30:34

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [4/23] sparc64 profiling cleanups

Convert sparc64 to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/sparc64/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/sparc64/kernel/time.c 2004-06-15 22:19:23.000000000 -0700
+++ prof-2.6.7/arch/sparc64/kernel/time.c 2004-06-22 07:25:46.238133904 -0700
@@ -29,6 +29,7 @@
#include <linux/jiffies.h>
#include <linux/cpufreq.h>
#include <linux/percpu.h>
+#include <linux/profile.h>

#include <asm/oplib.h>
#include <asm/mostek.h>
@@ -451,7 +452,7 @@
if (user_mode(regs))
return;

- if (!prof_buffer)
+ if (!profiling_on())
return;

{
@@ -472,13 +473,7 @@
(pc >= (unsigned long) &__bitops_begin &&
pc < (unsigned long) &__bitops_end))
pc = o7;
-
- pc -= (unsigned long) _stext;
- pc >>= prof_shift;
-
- if(pc >= prof_len)
- pc = prof_len - 1;
- atomic_inc((atomic_t *)&prof_buffer[pc]);
+ profile_tick(pc);
}
}

2004-06-22 15:43:07

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [2/23] ppc32 profiling cleanups

Convert ppc32 to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/ppc/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/ppc/kernel/time.c 2004-06-15 22:19:44.000000000 -0700
+++ prof-2.6.7/arch/ppc/kernel/time.c 2004-06-22 07:25:44.334423312 -0700
@@ -56,6 +56,7 @@
#include <linux/mc146818rtc.h>
#include <linux/time.h>
#include <linux/init.h>
+#include <linux/profile.h>

#include <asm/segment.h>
#include <asm/io.h>
@@ -108,14 +109,10 @@
}

extern unsigned long prof_cpu_mask;
-extern unsigned int * prof_buffer;
-extern unsigned long prof_len;
-extern unsigned long prof_shift;
-extern char _stext;

static inline void ppc_do_profile (unsigned long nip)
{
- if (!prof_buffer)
+ if (!profiling_on())
return;

/*
@@ -124,17 +121,7 @@
*/
if (!((1<<smp_processor_id()) & prof_cpu_mask))
return;
-
- nip -= (unsigned long) &_stext;
- nip >>= prof_shift;
- /*
- * Don't ignore out-of-bounds EIP values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- if (nip > prof_len-1)
- nip = prof_len-1;
- atomic_inc((atomic_t *)&prof_buffer[nip]);
+ profile_tick(nip);
}

/*
Index: prof-2.6.7/include/linux/profile.h
===================================================================
--- prof-2.6.7.orig/include/linux/profile.h 2004-06-22 07:25:43.260586560 -0700
+++ prof-2.6.7/include/linux/profile.h 2004-06-22 07:25:44.336423008 -0700
@@ -14,6 +14,8 @@
/* init basic kernel profiler */
void __init profile_init(void);
void create_proc_profile(void);
+void profile_tick(unsigned long);
+int profiling_on(void);

extern unsigned int * prof_buffer;
extern unsigned long prof_len;
Index: prof-2.6.7/kernel/profile.c
===================================================================
--- prof-2.6.7.orig/kernel/profile.c 2004-06-22 07:25:43.258586864 -0700
+++ prof-2.6.7/kernel/profile.c 2004-06-22 07:25:44.339422552 -0700
@@ -42,6 +42,23 @@
prof_buffer = (unsigned int *) alloc_bootmem(size);
}

+int profiling_on(void)
+{
+ return !!prof_on;
+}
+
+void profile_tick(unsigned long pc)
+{
+ atomic_t *count;
+ unsigned long idx;
+
+ if (!prof_on)
+ return;
+ idx = (pc - (unsigned long)_stext) >> prof_shift;
+ count = (atomic_t *)&prof_buffer[min(idx, prof_len - 1)];
+ atomic_inc(count);
+}
+
/* Profile event notifications */

#ifdef CONFIG_PROFILING

2004-06-22 15:43:06

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [5/23] m68knommu profiling cleanups

Convert m68knommu to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/m68knommu/platform/5307/timers.c
===================================================================
--- prof-2.6.7.orig/arch/m68knommu/platform/5307/timers.c 2004-06-15 22:20:26.000000000 -0700
+++ prof-2.6.7/arch/m68knommu/platform/5307/timers.c 2004-06-22 07:25:47.118999992 -0700
@@ -14,6 +14,7 @@
#include <linux/param.h>
#include <linux/interrupt.h>
#include <linux/init.h>
+#include <linux/profile.h>
#include <asm/irq.h>
#include <asm/traps.h>
#include <asm/machdep.h>
@@ -111,16 +112,8 @@
/* Reset ColdFire timer2 */
mcf_proftp->ter = MCFTIMER_TER_CAP | MCFTIMER_TER_REF;

- if (!user_mode(regs)) {
- if (prof_buffer && current->pid) {
- extern int _stext;
- unsigned long ip = instruction_pointer(regs);
- ip -= (unsigned long) &_stext;
- ip >>= prof_shift;
- if (ip < prof_len)
- prof_buffer[ip]++;
- }
- }
+ if (!user_mode(regs) && profiling_on() && current->pid)
+ profile_tick(instruction_pointer(regs));
}

/***************************************************************************/
Index: prof-2.6.7/arch/m68knommu/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/m68knommu/kernel/time.c 2004-06-15 22:19:37.000000000 -0700
+++ prof-2.6.7/arch/m68knommu/kernel/time.c 2004-06-22 07:25:47.120999688 -0700
@@ -43,20 +43,8 @@

static inline void do_profile (unsigned long pc)
{
- if (prof_buffer && current->pid) {
- extern int _stext;
- pc -= (unsigned long) &_stext;
- pc >>= prof_shift;
- if (pc < prof_len)
- ++prof_buffer[pc];
- else
- /*
- * Don't ignore out-of-bounds PC values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- ++prof_buffer[prof_len-1];
- }
+ if (current->pid)
+ profile_tick(pc);
}

/*

2004-06-22 15:43:05

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [1/23] move proc_profile_operations to profile.c

Move proc_profile_operations into kernel/profile.c

Index: prof-2.6.7/fs/proc/proc_misc.c
===================================================================
--- prof-2.6.7.orig/fs/proc/proc_misc.c 2004-06-15 22:18:58.000000000 -0700
+++ prof-2.6.7/fs/proc/proc_misc.c 2004-06-22 07:25:43.255587320 -0700
@@ -555,70 +555,6 @@
return proc_calc_metrics(page, start, off, count, eof, len);
}

-/*
- * This function accesses profiling information. The returned data is
- * binary: the sampling step and the actual contents of the profile
- * buffer. Use of the program readprofile is recommended in order to
- * get meaningful info out of these data.
- */
-static ssize_t
-read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
-{
- unsigned long p = *ppos;
- ssize_t read;
- char * pnt;
- unsigned int sample_step = 1 << prof_shift;
-
- if (p >= (prof_len+1)*sizeof(unsigned int))
- return 0;
- if (count > (prof_len+1)*sizeof(unsigned int) - p)
- count = (prof_len+1)*sizeof(unsigned int) - p;
- read = 0;
-
- while (p < sizeof(unsigned int) && count > 0) {
- put_user(*((char *)(&sample_step)+p),buf);
- buf++; p++; count--; read++;
- }
- pnt = (char *)prof_buffer + p - sizeof(unsigned int);
- if (copy_to_user(buf,(void *)pnt,count))
- return -EFAULT;
- read += count;
- *ppos += read;
- return read;
-}
-
-/*
- * Writing to /proc/profile resets the counters
- *
- * Writing a 'profiling multiplier' value into it also re-sets the profiling
- * interrupt frequency, on architectures that support this.
- */
-static ssize_t write_profile(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
-#ifdef CONFIG_SMP
- extern int setup_profiling_timer (unsigned int multiplier);
-
- if (count == sizeof(int)) {
- unsigned int multiplier;
-
- if (copy_from_user(&multiplier, buf, sizeof(int)))
- return -EFAULT;
-
- if (setup_profiling_timer(multiplier))
- return -EINVAL;
- }
-#endif
-
- memset(prof_buffer, 0, prof_len * sizeof(*prof_buffer));
- return count;
-}
-
-static struct file_operations proc_profile_operations = {
- .read = read_profile,
- .write = write_profile,
-};
-
#ifdef CONFIG_MAGIC_SYSRQ
/*
* writing 'C' to /proc/sysrq-trigger is like sysrq-C
@@ -706,13 +642,7 @@
(size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
}
#endif
- if (prof_on) {
- entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL);
- if (entry) {
- entry->proc_fops = &proc_profile_operations;
- entry->size = (1+prof_len) * sizeof(unsigned int);
- }
- }
+ create_proc_profile();
#ifdef CONFIG_MAGIC_SYSRQ
entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
if (entry)
Index: prof-2.6.7/kernel/profile.c
===================================================================
--- prof-2.6.7.orig/kernel/profile.c 2004-06-15 22:20:04.000000000 -0700
+++ prof-2.6.7/kernel/profile.c 2004-06-22 07:25:43.258586864 -0700
@@ -155,3 +155,82 @@

EXPORT_SYMBOL_GPL(profile_event_register);
EXPORT_SYMBOL_GPL(profile_event_unregister);
+
+#ifdef CONFIG_PROC_FS
+/*
+ * This function accesses profiling information. The returned data is
+ * binary: the sampling step and the actual contents of the profile
+ * buffer. Use of the program readprofile is recommended in order to
+ * get meaningful info out of these data.
+ */
+static ssize_t
+read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+ unsigned long p = *ppos;
+ ssize_t read;
+ char * pnt;
+ unsigned int sample_step = 1 << prof_shift;
+
+ if (p >= (prof_len+1)*sizeof(unsigned int))
+ return 0;
+ if (count > (prof_len+1)*sizeof(unsigned int) - p)
+ count = (prof_len+1)*sizeof(unsigned int) - p;
+ read = 0;
+
+ while (p < sizeof(unsigned int) && count > 0) {
+ put_user(*((char *)(&sample_step)+p),buf);
+ buf++; p++; count--; read++;
+ }
+ pnt = (char *)prof_buffer + p - sizeof(unsigned int);
+ if (copy_to_user(buf,(void *)pnt,count))
+ return -EFAULT;
+ read += count;
+ *ppos += read;
+ return read;
+}
+
+/*
+ * Writing to /proc/profile resets the counters
+ *
+ * Writing a 'profiling multiplier' value into it also re-sets the profiling
+ * interrupt frequency, on architectures that support this.
+ */
+static ssize_t write_profile(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+#ifdef CONFIG_SMP
+ extern int setup_profiling_timer (unsigned int multiplier);
+
+ if (count == sizeof(int)) {
+ unsigned int multiplier;
+
+ if (copy_from_user(&multiplier, buf, sizeof(int)))
+ return -EFAULT;
+
+ if (setup_profiling_timer(multiplier))
+ return -EINVAL;
+ }
+#endif
+
+ memset(prof_buffer, 0, prof_len * sizeof(*prof_buffer));
+ return count;
+}
+
+static struct file_operations proc_profile_operations = {
+ .read = read_profile,
+ .write = write_profile,
+};
+
+void create_proc_profile(void)
+{
+ struct proc_dir_entry *entry;
+
+ if (!prof_on)
+ return;
+ entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL);
+ if (!entry)
+ return;
+ entry->proc_fops = &proc_profile_operations;
+ entry->size = (1+prof_len) * sizeof(unsigned int);
+}
+#endif /* CONFIG_PROC_FS */
Index: prof-2.6.7/include/linux/profile.h
===================================================================
--- prof-2.6.7.orig/include/linux/profile.h 2004-06-15 22:19:22.000000000 -0700
+++ prof-2.6.7/include/linux/profile.h 2004-06-22 07:25:43.260586560 -0700
@@ -13,6 +13,7 @@

/* init basic kernel profiler */
void __init profile_init(void);
+void create_proc_profile(void);

extern unsigned int * prof_buffer;
extern unsigned long prof_len;

2004-06-22 15:30:13

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [7/23] superh profiling cleanups

Convert SuperH to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/sh/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/sh/kernel/time.c 2004-06-15 22:18:37.000000000 -0700
+++ prof-2.6.7/arch/sh/kernel/time.c 2004-06-22 07:25:48.921725936 -0700
@@ -24,6 +24,7 @@
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/smp.h>
+#include <linux/profile.h>

#include <asm/processor.h>
#include <asm/uaccess.h>
@@ -235,32 +236,16 @@

/* Profiling definitions */
extern unsigned long prof_cpu_mask;
-extern unsigned int * prof_buffer;
-extern unsigned long prof_len;
-extern unsigned long prof_shift;
-extern char _stext;

static inline void sh_do_profile(unsigned long pc)
{
/* Don't profile cpu_idle.. */
- if (!prof_buffer || !current->pid)
+ if (!profiling_on() || !current->pid)
return;

if (pc >= 0xa0000000UL && pc < 0xc0000000UL)
pc -= 0x20000000;
-
- pc -= (unsigned long)&_stext;
- pc >>= prof_shift;
-
- /*
- * Don't ignore out-of-bounds PC values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- if (pc > prof_len - 1)
- pc = prof_len - 1;
-
- atomic_inc((atomic_t *)&prof_buffer[pc]);
+ profile_tick(pc);
}

/*

2004-06-22 15:30:13

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [9/23] m68k profiling cleanups

Convert m68k to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/m68k/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/m68k/kernel/time.c 2004-06-15 22:19:02.000000000 -0700
+++ prof-2.6.7/arch/m68k/kernel/time.c 2004-06-22 07:25:50.615468448 -0700
@@ -40,20 +40,8 @@

static inline void do_profile (unsigned long pc)
{
- if (prof_buffer && current->pid) {
- extern int _stext;
- pc -= (unsigned long) &_stext;
- pc >>= prof_shift;
- if (pc < prof_len)
- ++prof_buffer[pc];
- else
- /*
- * Don't ignore out-of-bounds PC values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- ++prof_buffer[prof_len-1];
- }
+ if (current->pid)
+ profile_tick(pc);
}

/*

2004-06-22 16:12:10

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [12/23] ppc64 profiling cleanups

Convert ppc64 to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/ppc64/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/ppc64/kernel/time.c 2004-06-15 22:20:03.000000000 -0700
+++ prof-2.6.7/arch/ppc64/kernel/time.c 2004-06-22 07:25:53.213073552 -0700
@@ -112,36 +112,16 @@
*/
static inline void ppc64_do_profile(struct pt_regs *regs)
{
- unsigned long nip;
extern unsigned long prof_cpu_mask;

profile_hook(regs);

- if (user_mode(regs))
- return;
-
- if (!prof_buffer)
- return;
-
- nip = instruction_pointer(regs);
-
/*
* Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
* (default is all CPUs.)
*/
- if (!((1<<smp_processor_id()) & prof_cpu_mask))
- return;
-
- nip -= (unsigned long)_stext;
- nip >>= prof_shift;
- /*
- * Don't ignore out-of-bounds EIP values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- if (nip > prof_len-1)
- nip = prof_len-1;
- atomic_inc((atomic_t *)&prof_buffer[nip]);
+ if (!user_mode(regs) && ((1<<smp_processor_id()) & prof_cpu_mask))
+ profile_tick(instruction_pointer(regs));
}

static __inline__ void timer_check_rtc(void)

2004-06-22 16:12:08

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [8/23] arm26 profiling cleanups

Convert arm26 to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/arm26/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/arm26/kernel/time.c 2004-06-15 22:19:42.000000000 -0700
+++ prof-2.6.7/arch/arm26/kernel/time.c 2004-06-22 07:25:49.764597800 -0700
@@ -72,21 +72,8 @@
*/
static inline void do_profile(struct pt_regs *regs)
{
- if (!user_mode(regs) &&
- prof_buffer &&
- current->pid) {
- unsigned long pc = instruction_pointer(regs);
- extern int _stext;
-
- pc -= (unsigned long)&_stext;
-
- pc >>= prof_shift;
-
- if (pc >= prof_len)
- pc = prof_len - 1;
-
- prof_buffer[pc] += 1;
- }
+ if (!user_mode(regs) && profiling_on() && current->pid)
+ profile_tick(instruction_pointer(regs));
}

static unsigned long next_rtc_update;

2004-06-22 16:12:09

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [10/23] ia64 profiling cleanups

Convert ia64 to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/ia64/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/ia64/kernel/time.c 2004-06-15 22:19:01.000000000 -0700
+++ prof-2.6.7/arch/ia64/kernel/time.c 2004-06-22 07:25:51.482336664 -0700
@@ -19,7 +19,6 @@
#include <linux/time.h>
#include <linux/interrupt.h>
#include <linux/efi.h>
-#include <linux/profile.h>
#include <linux/timex.h>

#include <asm/machvec.h>
@@ -203,7 +202,7 @@
if (user_mode(regs))
return;

- if (!prof_buffer)
+ if (!profiling_on())
return;

ip = instruction_pointer(regs);
@@ -217,19 +216,8 @@
* Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
* (default is all CPUs.)
*/
- if (!cpu_isset(smp_processor_id(), prof_cpu_mask))
- return;
-
- ip -= (unsigned long) &_stext;
- ip >>= prof_shift;
- /*
- * Don't ignore out-of-bounds IP values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- if (ip > prof_len-1)
- ip = prof_len-1;
- atomic_inc((atomic_t *)&prof_buffer[ip]);
+ if (cpu_isset(smp_processor_id(), prof_cpu_mask))
+ profile_tick(ip);
}

static irqreturn_t

2004-06-22 16:12:10

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [13/23] arm profiling cleanups

Convert ARM to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/arm/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/arm/kernel/time.c 2004-06-15 22:19:43.000000000 -0700
+++ prof-2.6.7/arch/arm/kernel/time.c 2004-06-22 07:25:54.061944504 -0700
@@ -85,24 +85,9 @@
*/
static inline void do_profile(struct pt_regs *regs)
{
-
profile_hook(regs);
-
- if (!user_mode(regs) &&
- prof_buffer &&
- current->pid) {
- unsigned long pc = instruction_pointer(regs);
- extern int _stext;
-
- pc -= (unsigned long)&_stext;
-
- pc >>= prof_shift;
-
- if (pc >= prof_len)
- pc = prof_len - 1;
-
- prof_buffer[pc] += 1;
- }
+ if (!user_mode(regs) && current->pid)
+ profile_tick(instruction_pointer(regs));
}

static unsigned long next_rtc_update;

2004-06-22 16:26:00

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [15/23] h8300 profiling cleanups

Convert h8300 to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/h8300/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/h8300/kernel/time.c 2004-06-15 22:19:10.000000000 -0700
+++ prof-2.6.7/arch/h8300/kernel/time.c 2004-06-22 07:25:55.750687776 -0700
@@ -38,20 +38,8 @@

static inline void do_profile (unsigned long pc)
{
- if (prof_buffer && current->pid) {
- extern int _stext;
- pc -= (unsigned long) &_stext;
- pc >>= prof_shift;
- if (pc < prof_len)
- ++prof_buffer[pc];
- else
- /*
- * Don't ignore out-of-bounds PC values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- ++prof_buffer[prof_len-1];
- }
+ if (current->pid)
+ profile_tick(pc);
}

/*
Index: prof-2.6.7/arch/v850/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/v850/kernel/time.c 2004-06-15 22:19:37.000000000 -0700
+++ prof-2.6.7/arch/v850/kernel/time.c 2004-06-22 07:25:55.752687472 -0700
@@ -42,20 +42,8 @@

static inline void do_profile (unsigned long pc)
{
- if (prof_buffer && current->pid) {
- extern int _stext;
- pc -= (unsigned long) &_stext;
- pc >>= prof_shift;
- if (pc < prof_len)
- ++prof_buffer[pc];
- else
- /*
- * Don't ignore out-of-bounds PC values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- ++prof_buffer[prof_len-1];
- }
+ if (current->pid)
+ profile_tick(pc);
}

/*

2004-06-22 16:26:02

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [14/23] parisc profiling cleanups

Convert PA-RISC to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/parisc/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/parisc/kernel/time.c 2004-06-15 22:20:27.000000000 -0700
+++ prof-2.6.7/arch/parisc/kernel/time.c 2004-06-22 07:25:54.908815760 -0700
@@ -54,16 +54,7 @@
#if 0
extern unsigned long prof_cpu_mask;
#endif
- extern char _stext;
-
profile_hook(regs);
-
- if (user_mode(regs))
- return;
-
- if (!prof_buffer)
- return;
-
#if 0
/* FIXME: when we have irq affinity to cpu, we need to
* only look at the cpus specified in this mask
@@ -72,17 +63,8 @@
if (!((1 << smp_processor_id()) & prof_cpu_mask))
return;
#endif
-
- pc -= (unsigned long) &_stext;
- pc >>= prof_shift;
- /*
- * Don't ignore out-of-bounds PC values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- if (pc > prof_len - 1)
- pc = prof_len - 1;
- atomic_inc((atomic_t *)&prof_buffer[pc]);
+ if (!user_mode(regs))
+ profile_tick(pc);
}

irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)

2004-06-22 16:26:01

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [16/23] s390 profiling cleanups

Convert S/390 to use profiling_on() and profile_tick().

Index: prof-2.6.7/arch/s390/kernel/time.c
===================================================================
--- prof-2.6.7.orig/arch/s390/kernel/time.c 2004-06-15 22:18:38.000000000 -0700
+++ prof-2.6.7/arch/s390/kernel/time.c 2004-06-22 07:25:56.691544744 -0700
@@ -190,36 +190,16 @@
*/
static inline void s390_do_profile(struct pt_regs * regs)
{
- unsigned long eip;
extern cpumask_t prof_cpu_mask;

profile_hook(regs);

- if (user_mode(regs))
- return;
-
- if (!prof_buffer)
- return;
-
- eip = instruction_pointer(regs);
-
/*
* Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
* (default is all CPUs.)
*/
- if (!cpu_isset(smp_processor_id(), prof_cpu_mask))
- return;
-
- eip -= (unsigned long) &_stext;
- eip >>= prof_shift;
- /*
- * Don't ignore out-of-bounds EIP values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- if (eip > prof_len-1)
- eip = prof_len-1;
- atomic_inc((atomic_t *)&prof_buffer[eip]);
+ if (!user_mode(regs) && cpu_isset(smp_processor_id(), prof_cpu_mask))
+ profile_tick(instruction_pointer(regs));
}
#else
#define s390_do_profile(regs) do { ; } while(0)

2004-06-22 16:32:50

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [17/23] x86-64 profiling cleanups

Convert x86-64 to use profiling_on() and profile_tick().

Index: prof-2.6.7/include/asm-x86_64/hw_irq.h
===================================================================
--- prof-2.6.7.orig/include/asm-x86_64/hw_irq.h 2004-06-15 22:20:04.000000000 -0700
+++ prof-2.6.7/include/asm-x86_64/hw_irq.h 2004-06-22 07:25:57.557413112 -0700
@@ -130,36 +130,15 @@

static inline void x86_do_profile (struct pt_regs *regs)
{
- unsigned long rip;
extern unsigned long prof_cpu_mask;
- extern char _stext[];

- profile_hook(regs);
-
- if (user_mode(regs))
- return;
- if (!prof_buffer)
- return;
-
- rip = regs->rip;
-
/*
* Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
* (default is all CPUs.)
*/
- if (!((1<<smp_processor_id()) & prof_cpu_mask))
- return;
-
- rip -= (unsigned long) &_stext;
- rip >>= prof_shift;
- /*
- * Don't ignore out-of-bounds EIP values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- if (rip > prof_len-1)
- rip = prof_len-1;
- atomic_inc((atomic_t *)&prof_buffer[rip]);
+ profile_hook(regs);
+ if (!user_mode(regs) && ((1<<smp_processor_id()) & prof_cpu_mask))
+ profile_tick(regs->rip);
}

#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP)

2004-06-22 16:32:51

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [18/23] i386 profiling cleanups

Convert i386 to use profiling_on() and profile_tick().

Index: prof-2.6.7/include/asm-i386/hw_irq.h
===================================================================
--- prof-2.6.7.orig/include/asm-i386/hw_irq.h 2004-06-15 22:20:25.000000000 -0700
+++ prof-2.6.7/include/asm-i386/hw_irq.h 2004-06-22 07:25:58.418282240 -0700
@@ -16,7 +16,6 @@
#include <linux/profile.h>
#include <asm/atomic.h>
#include <asm/irq.h>
-#include <asm/sections.h>

/*
* Various low-level irq details needed by irq.c, process.c,
@@ -76,36 +75,15 @@
*/
static inline void x86_do_profile(struct pt_regs * regs)
{
- unsigned long eip;
extern unsigned long prof_cpu_mask;

- profile_hook(regs);
-
- if (user_mode(regs))
- return;
-
- if (!prof_buffer)
- return;
-
- eip = regs->eip;
-
/*
* Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
* (default is all CPUs.)
*/
- if (!((1<<smp_processor_id()) & prof_cpu_mask))
- return;
-
- eip -= (unsigned long)_stext;
- eip >>= prof_shift;
- /*
- * Don't ignore out-of-bounds EIP values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- if (eip > prof_len-1)
- eip = prof_len-1;
- atomic_inc((atomic_t *)&prof_buffer[eip]);
+ profile_hook(regs);
+ if (!user_mode(regs) && ((1<<smp_processor_id()) & prof_cpu_mask))
+ profile_tick(regs->eip);
}

#if defined(CONFIG_X86_IO_APIC)

2004-06-22 16:39:33

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [19/23] remove public decls of profile.c internal state

Privatize prof_buffer, prof_len, and prof_shift.

Index: prof-2.6.7/include/linux/profile.h
===================================================================
--- prof-2.6.7.orig/include/linux/profile.h 2004-06-22 07:25:44.336423008 -0700
+++ prof-2.6.7/include/linux/profile.h 2004-06-22 07:25:59.265153496 -0700
@@ -17,12 +17,6 @@
void profile_tick(unsigned long);
int profiling_on(void);

-extern unsigned int * prof_buffer;
-extern unsigned long prof_len;
-extern unsigned long prof_shift;
-extern int prof_on;
-
-
enum profile_type {
EXIT_TASK,
EXIT_MMAP,
Index: prof-2.6.7/kernel/profile.c
===================================================================
--- prof-2.6.7.orig/kernel/profile.c 2004-06-22 07:25:44.339422552 -0700
+++ prof-2.6.7/kernel/profile.c 2004-06-22 07:25:59.267153192 -0700
@@ -10,10 +10,9 @@
#include <linux/mm.h>
#include <asm/sections.h>

-unsigned int * prof_buffer;
-unsigned long prof_len;
-unsigned long prof_shift;
-int prof_on;
+static unsigned int *prof_buffer;
+static unsigned long prof_len, prof_shift;
+static int prof_on;

int __init profile_setup(char * str)
{

2004-06-22 16:39:37

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [21/23] use atomic_t for prof_buffer

Convert prof_buffer to an array of atomic_t's.

Index: prof-2.6.7/kernel/profile.c
===================================================================
--- prof-2.6.7.orig/kernel/profile.c 2004-06-22 07:26:00.201011224 -0700
+++ prof-2.6.7/kernel/profile.c 2004-06-22 07:26:01.063880048 -0700
@@ -10,7 +10,7 @@
#include <linux/mm.h>
#include <asm/sections.h>

-static unsigned int *prof_buffer;
+static atomic_t *prof_buffer;
static unsigned long prof_len, prof_shift;
static int prof_on;

@@ -33,7 +33,7 @@

/* only text is profiled */
prof_len = (_etext - _stext) >> prof_shift;
- prof_buffer = alloc_bootmem(sizeof(unsigned int)*prof_len);
+ prof_buffer = alloc_bootmem(sizeof(atomic_t)*prof_len);
}

int profiling_on(void)
@@ -43,14 +43,12 @@

void profile_tick(unsigned long pc)
{
- atomic_t *count;
unsigned long idx;

if (!prof_on)
return;
idx = (pc - (unsigned long)_stext) >> prof_shift;
- count = (atomic_t *)&prof_buffer[min(idx, prof_len - 1)];
- atomic_inc(count);
+ atomic_inc(&prof_buffer[min(idx, prof_len - 1)]);
}

/* Profile event notifications */
@@ -182,17 +180,17 @@
char * pnt;
unsigned int sample_step = 1 << prof_shift;

- if (p >= (prof_len+1)*sizeof(unsigned int))
+ if (p >= (prof_len+1)*sizeof(atomic_t))
return 0;
- if (count > (prof_len+1)*sizeof(unsigned int) - p)
- count = (prof_len+1)*sizeof(unsigned int) - p;
+ if (count > (prof_len+1)*sizeof(atomic_t) - p)
+ count = (prof_len+1)*sizeof(atomic_t) - p;
read = 0;

- while (p < sizeof(unsigned int) && count > 0) {
+ while (p < sizeof(atomic_t) && count > 0) {
put_user(*((char *)(&sample_step)+p),buf);
buf++; p++; count--; read++;
}
- pnt = (char *)prof_buffer + p - sizeof(unsigned int);
+ pnt = (char *)prof_buffer + p - sizeof(atomic_t);
if (copy_to_user(buf,(void *)pnt,count))
return -EFAULT;
read += count;
@@ -223,7 +221,7 @@
}
#endif

- memset(prof_buffer, 0, prof_len * sizeof(*prof_buffer));
+ memset(prof_buffer, 0, prof_len*sizeof(atomic_t));
return count;
}

@@ -242,6 +240,6 @@
if (!entry)
return;
entry->proc_fops = &proc_profile_operations;
- entry->size = (1+prof_len) * sizeof(unsigned int);
+ entry->size = (1+prof_len) * sizeof(atomic_t);
}
#endif /* CONFIG_PROC_FS */

2004-06-22 16:39:34

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [20/23] clean up profile_init() not to oversize buffer

Don't overestimate the length of prof_buffer in profile_init().

Index: prof-2.6.7/kernel/profile.c
===================================================================
--- prof-2.6.7.orig/kernel/profile.c 2004-06-22 07:25:59.267153192 -0700
+++ prof-2.6.7/kernel/profile.c 2004-06-22 07:26:00.201011224 -0700
@@ -28,17 +28,12 @@

void __init profile_init(void)
{
- unsigned int size;
-
if (!prof_on)
return;

/* only text is profiled */
- prof_len = _etext - _stext;
- prof_len >>= prof_shift;
-
- size = prof_len * sizeof(unsigned int) + PAGE_SIZE - 1;
- prof_buffer = (unsigned int *) alloc_bootmem(size);
+ prof_len = (_etext - _stext) >> prof_shift;
+ prof_buffer = alloc_bootmem(sizeof(unsigned int)*prof_len);
}

int profiling_on(void)

2004-06-22 16:50:25

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [22/23] put 1 << prof_shift at prof_buffer[0]

Change the profile buffer format so that prof_buffer[0] contains the
stepsize.

Index: prof-2.6.7/kernel/profile.c
===================================================================
--- prof-2.6.7.orig/kernel/profile.c 2004-06-22 07:26:01.063880048 -0700
+++ prof-2.6.7/kernel/profile.c 2004-06-22 07:26:01.925749024 -0700
@@ -32,8 +32,9 @@
return;

/* only text is profiled */
- prof_len = (_etext - _stext) >> prof_shift;
+ prof_len = ((unsigned long)(_etext - _stext) + 1) >> prof_shift;
prof_buffer = alloc_bootmem(sizeof(atomic_t)*prof_len);
+ atomic_set(prof_buffer, 1 << prof_shift);
}

int profiling_on(void)
@@ -48,7 +49,7 @@
if (!prof_on)
return;
idx = (pc - (unsigned long)_stext) >> prof_shift;
- atomic_inc(&prof_buffer[min(idx, prof_len - 1)]);
+ atomic_inc(&prof_buffer[min(idx + 1, prof_len - 1)]);
}

/* Profile event notifications */
@@ -176,26 +177,14 @@
read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
- ssize_t read;
- char * pnt;
- unsigned int sample_step = 1 << prof_shift;

- if (p >= (prof_len+1)*sizeof(atomic_t))
+ if (p >= sizeof(atomic_t)*prof_len)
return 0;
- if (count > (prof_len+1)*sizeof(atomic_t) - p)
- count = (prof_len+1)*sizeof(atomic_t) - p;
- read = 0;
-
- while (p < sizeof(atomic_t) && count > 0) {
- put_user(*((char *)(&sample_step)+p),buf);
- buf++; p++; count--; read++;
- }
- pnt = (char *)prof_buffer + p - sizeof(atomic_t);
- if (copy_to_user(buf,(void *)pnt,count))
+ count = min(prof_len*sizeof(atomic_t) - p, count);
+ if (copy_to_user(buf, (char *)prof_buffer + p, count))
return -EFAULT;
- read += count;
- *ppos += read;
- return read;
+ *ppos += count;
+ return count;
}

/*
@@ -240,6 +229,6 @@
if (!entry)
return;
entry->proc_fops = &proc_profile_operations;
- entry->size = (1+prof_len) * sizeof(atomic_t);
+ entry->size = sizeof(atomic_t)*prof_len;
}
#endif /* CONFIG_PROC_FS */

2004-06-22 16:50:26

by William Lee Irwin III

[permalink] [raw]
Subject: [profile]: [23/23] add mmap() support for /proc/profile

Allow mmap() on /proc/profile.

Index: prof-2.6.7/kernel/profile.c
===================================================================
--- prof-2.6.7.orig/kernel/profile.c 2004-06-22 07:26:01.925749024 -0700
+++ prof-2.6.7/kernel/profile.c 2004-06-22 07:26:02.811614352 -0700
@@ -8,10 +8,11 @@
#include <linux/bootmem.h>
#include <linux/notifier.h>
#include <linux/mm.h>
+#include <linux/pagemap.h>
#include <asm/sections.h>

static atomic_t *prof_buffer;
-static unsigned long prof_len, prof_shift;
+static unsigned long prof_len, prof_shift, prof_pages;
static int prof_on;

int __init profile_setup(char * str)
@@ -33,7 +34,8 @@

/* only text is profiled */
prof_len = ((unsigned long)(_etext - _stext) + 1) >> prof_shift;
- prof_buffer = alloc_bootmem(sizeof(atomic_t)*prof_len);
+ prof_buffer = alloc_bootmem_pages(sizeof(atomic_t)*prof_len);
+ prof_pages = PAGE_ALIGN(prof_len*sizeof(atomic_t))/PAGE_SIZE;
atomic_set(prof_buffer, 1 << prof_shift);
}

@@ -167,6 +169,7 @@
EXPORT_SYMBOL_GPL(profile_event_unregister);

#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
/*
* This function accesses profiling information. The returned data is
* binary: the sampling step and the actual contents of the profile
@@ -214,9 +217,37 @@
return count;
}

+static struct page *profile_nopage(struct vm_area_struct *vma,
+ unsigned long vaddr, int *type)
+{
+ void *kvaddr;
+
+ if (linear_page_index(vma, vaddr) > prof_pages) {
+ *type = VM_FAULT_SIGBUS;
+ return NOPAGE_SIGBUS;
+ }
+ kvaddr = (void *)(PAGE_SIZE*vma->vm_pgoff + vaddr - vma->vm_start);
+ *type = VM_FAULT_MINOR;
+ return virt_to_page(kvaddr);
+}
+
+static struct vm_operations_struct profile_vm_ops = {
+ .nopage = profile_nopage,
+};
+
+static int mmap_profile(struct file *file, struct vm_area_struct *vma)
+{
+ if (vma->vm_pgoff + vma_pages(vma) > prof_pages)
+ return -ENODEV;
+ vma->vm_flags |= VM_RESERVED|VM_IO;
+ vma->vm_ops = &profile_vm_ops;
+ return 0;
+}
+
static struct file_operations proc_profile_operations = {
.read = read_profile,
.write = write_profile,
+ .mmap = mmap_profile,
};

void create_proc_profile(void)

2004-06-22 17:13:05

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [profile]: [23/23] add mmap() support for /proc/profile

On Tue, Jun 22, 2004 at 08:18:10AM -0700, William Lee Irwin III wrote:
> Allow mmap() on /proc/profile.

Stale code. =(

The working version is:

Index: prof-2.6.7/kernel/profile.c
===================================================================
--- prof-2.6.7.orig/kernel/profile.c 2004-06-22 08:28:12.237655416 -0700
+++ prof-2.6.7/kernel/profile.c 2004-06-22 08:28:13.102523936 -0700
@@ -8,10 +8,11 @@
#include <linux/bootmem.h>
#include <linux/notifier.h>
#include <linux/mm.h>
+#include <linux/pagemap.h>
#include <asm/sections.h>

static atomic_t *prof_buffer;
-static unsigned long prof_len, prof_shift;
+static unsigned long prof_len, prof_shift, prof_pages;
static int prof_on;

int __init profile_setup(char * str)
@@ -33,7 +34,8 @@

/* only text is profiled */
prof_len = ((unsigned long)(_etext - _stext) + 1) >> prof_shift;
- prof_buffer = alloc_bootmem(sizeof(atomic_t)*prof_len);
+ prof_buffer = alloc_bootmem_pages(sizeof(atomic_t)*prof_len);
+ prof_pages = PAGE_ALIGN(prof_len*sizeof(atomic_t))/PAGE_SIZE;
atomic_set(prof_buffer, 1 << prof_shift);
}

@@ -167,6 +169,7 @@
EXPORT_SYMBOL_GPL(profile_event_unregister);

#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
/*
* This function accesses profiling information. The returned data is
* binary: the sampling step and the actual contents of the profile
@@ -180,7 +183,7 @@

if (p >= sizeof(atomic_t)*prof_len)
return 0;
- count = min(prof_len*sizeof(atomic_t) - p, count);
+ count = min_t(size_t, prof_len*sizeof(atomic_t) - p, count);
if (copy_to_user(buf, (char *)prof_buffer + p, count))
return -EFAULT;
*ppos += count;
@@ -210,13 +213,44 @@
}
#endif

- memset(prof_buffer, 0, prof_len*sizeof(atomic_t));
+ memset(&prof_buffer[1], 0, (prof_len - 1)*sizeof(atomic_t));
return count;
}

+static int mmap_profile(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long pfn, vaddr, base_pfn = __pa(prof_buffer)/PAGE_SIZE;
+ if (vma->vm_pgoff + vma_pages(vma) > prof_pages)
+ return -ENODEV;
+ vma->vm_flags |= VM_RESERVED|VM_IO;
+ for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
+ pgd_t *pgd = pgd_offset(vma->vm_mm, vaddr);
+ pmd_t *pmd;
+ pte_t *pte, pte_val;
+ spin_lock(&vma->vm_mm->page_table_lock);
+ pmd = pmd_alloc(vma->vm_mm, pgd, vaddr);
+ if (!pmd)
+ goto enomem;
+ pte = pte_alloc_map(vma->vm_mm, pmd, vaddr);
+ if (!pte)
+ goto enomem;
+ pfn = base_pfn + linear_page_index(vma, vaddr);
+ pte_val = pfn_pte(pfn, vma->vm_page_prot);
+ set_pte(pte, pte_val);
+ update_mmu_cache(vma, vaddr, pte_val);
+ pte_unmap(pte);
+ spin_unlock(&vma->vm_mm->page_table_lock);
+ }
+ return 0;
+enomem:
+ spin_unlock(&vma->vm_mm->page_table_lock);
+ return -ENOMEM;
+}
+
static struct file_operations proc_profile_operations = {
.read = read_profile,
.write = write_profile,
+ .mmap = mmap_profile,
};

void create_proc_profile(void)

2004-06-22 17:18:06

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [profile]: [22/23] put 1 << prof_shift at prof_buffer[0]

On Tue, Jun 22, 2004 at 08:18:10AM -0700, William Lee Irwin III wrote:
> Change the profile buffer format so that prof_buffer[0] contains the
> stepsize.

And this actually needs to be the following, to fix up an off-by-one
and a hunk that migrated to the wrong patch of the series (to be
followed by an mmap() patch that actually applies atop this):


Index: prof-2.6.7/kernel/profile.c
===================================================================
--- prof-2.6.7.orig/kernel/profile.c 2004-06-22 08:28:11.401782488 -0700
+++ prof-2.6.7/kernel/profile.c 2004-06-22 10:06:59.034645320 -0700
@@ -32,8 +32,9 @@
return;

/* only text is profiled */
- prof_len = (_etext - _stext) >> prof_shift;
+ prof_len = ((unsigned long)(_etext - _stext) >> prof_shift) + 1;
prof_buffer = alloc_bootmem(sizeof(atomic_t)*prof_len);
+ atomic_set(prof_buffer, 1 << prof_shift);
}

int profiling_on(void)
@@ -48,7 +49,7 @@
if (!prof_on)
return;
idx = (pc - (unsigned long)_stext) >> prof_shift;
- atomic_inc(&prof_buffer[min(idx, prof_len - 1)]);
+ atomic_inc(&prof_buffer[min(idx + 1, prof_len - 1)]);
}

/* Profile event notifications */
@@ -176,26 +177,14 @@
read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
- ssize_t read;
- char * pnt;
- unsigned int sample_step = 1 << prof_shift;

- if (p >= (prof_len+1)*sizeof(atomic_t))
+ if (p >= sizeof(atomic_t)*prof_len)
return 0;
- if (count > (prof_len+1)*sizeof(atomic_t) - p)
- count = (prof_len+1)*sizeof(atomic_t) - p;
- read = 0;
-
- while (p < sizeof(atomic_t) && count > 0) {
- put_user(*((char *)(&sample_step)+p),buf);
- buf++; p++; count--; read++;
- }
- pnt = (char *)prof_buffer + p - sizeof(atomic_t);
- if (copy_to_user(buf,(void *)pnt,count))
+ count = min(prof_len*sizeof(atomic_t) - p, count);
+ if (copy_to_user(buf, (char *)prof_buffer + p, count))
return -EFAULT;
- read += count;
- *ppos += read;
- return read;
+ *ppos += count;
+ return count;
}

/*
@@ -221,7 +210,7 @@
}
#endif

- memset(prof_buffer, 0, prof_len*sizeof(atomic_t));
+ memset(&prof_buffer[1], 0, (prof_len-1)*sizeof(atomic_t));
return count;
}

@@ -240,6 +229,6 @@
if (!entry)
return;
entry->proc_fops = &proc_profile_operations;
- entry->size = (1+prof_len) * sizeof(atomic_t);
+ entry->size = sizeof(atomic_t)*prof_len;
}
#endif /* CONFIG_PROC_FS */

2004-06-22 17:18:19

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [profile]: [22/23] put 1 << prof_shift at prof_buffer[0]

On Tue, Jun 22, 2004 at 10:10:57AM -0700, William Lee Irwin III wrote:
> And this actually needs to be the following, to fix up an off-by-one
> and a hunk that migrated to the wrong patch of the series (to be
> followed by an mmap() patch that actually applies atop this):

And the mmap() patch that applies atop this is:


Index: prof-2.6.7/kernel/profile.c
===================================================================
--- prof-2.6.7.orig/kernel/profile.c 2004-06-22 10:06:59.034645320 -0700
+++ prof-2.6.7/kernel/profile.c 2004-06-22 10:08:31.884529992 -0700
@@ -8,10 +8,11 @@
#include <linux/bootmem.h>
#include <linux/notifier.h>
#include <linux/mm.h>
+#include <linux/pagemap.h>
#include <asm/sections.h>

static atomic_t *prof_buffer;
-static unsigned long prof_len, prof_shift;
+static unsigned long prof_len, prof_shift, prof_pages;
static int prof_on;

int __init profile_setup(char * str)
@@ -33,7 +34,8 @@

/* only text is profiled */
prof_len = ((unsigned long)(_etext - _stext) >> prof_shift) + 1;
- prof_buffer = alloc_bootmem(sizeof(atomic_t)*prof_len);
+ prof_buffer = alloc_bootmem_pages(sizeof(atomic_t)*prof_len);
+ prof_pages = PAGE_ALIGN(prof_len*sizeof(atomic_t))/PAGE_SIZE;
atomic_set(prof_buffer, 1 << prof_shift);
}

@@ -167,6 +169,7 @@
EXPORT_SYMBOL_GPL(profile_event_unregister);

#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
/*
* This function accesses profiling information. The returned data is
* binary: the sampling step and the actual contents of the profile
@@ -180,7 +183,7 @@

if (p >= sizeof(atomic_t)*prof_len)
return 0;
- count = min(prof_len*sizeof(atomic_t) - p, count);
+ count = min_t(size_t, prof_len*sizeof(atomic_t) - p, count);
if (copy_to_user(buf, (char *)prof_buffer + p, count))
return -EFAULT;
*ppos += count;
@@ -214,9 +217,40 @@
return count;
}

+static int mmap_profile(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long pfn, vaddr, base_pfn = __pa(prof_buffer)/PAGE_SIZE;
+ if (vma->vm_pgoff + vma_pages(vma) > prof_pages)
+ return -ENODEV;
+ vma->vm_flags |= VM_RESERVED|VM_IO;
+ for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
+ pgd_t *pgd = pgd_offset(vma->vm_mm, vaddr);
+ pmd_t *pmd;
+ pte_t *pte, pte_val;
+ spin_lock(&vma->vm_mm->page_table_lock);
+ pmd = pmd_alloc(vma->vm_mm, pgd, vaddr);
+ if (!pmd)
+ goto enomem;
+ pte = pte_alloc_map(vma->vm_mm, pmd, vaddr);
+ if (!pte)
+ goto enomem;
+ pfn = base_pfn + linear_page_index(vma, vaddr);
+ pte_val = pfn_pte(pfn, vma->vm_page_prot);
+ set_pte(pte, pte_val);
+ update_mmu_cache(vma, vaddr, pte_val);
+ pte_unmap(pte);
+ spin_unlock(&vma->vm_mm->page_table_lock);
+ }
+ return 0;
+enomem:
+ spin_unlock(&vma->vm_mm->page_table_lock);
+ return -ENOMEM;
+}
+
static struct file_operations proc_profile_operations = {
.read = read_profile,
.write = write_profile,
+ .mmap = mmap_profile,
};

void create_proc_profile(void)

2004-06-22 20:24:03

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [profile]: [21/23] use atomic_t for prof_buffer

On Tue, 22 Jun 2004 08:17:55 -0700 William Lee Irwin III <[email protected]> wrote:
>> Convert prof_buffer to an array of atomic_t's.

On Tue, Jun 22, 2004 at 01:01:16PM -0700, David S. Miller wrote:
> Part of a data type exported to userspace, is it not?
> Thus, is it really valid to change it like this?

They're copied raw to userspace now and casted to atomic_t for all
modifications except for sparc32, arm, h8300, m68k, and m68knommu,
where it's still equivalent (the atomic operations just do normal
arithmetic under hashed locks or with ll/sc or other easily zennable
asm), so there is no change. Or did I miss an arch?


-- wli

2004-06-22 21:15:48

by David Miller

[permalink] [raw]
Subject: Re: [profile]: [21/23] use atomic_t for prof_buffer

On Tue, 22 Jun 2004 13:16:54 -0700
William Lee Irwin III <[email protected]> wrote:

> On Tue, 22 Jun 2004 08:17:55 -0700 William Lee Irwin III <[email protected]> wrote:
> >> Convert prof_buffer to an array of atomic_t's.
>
> On Tue, Jun 22, 2004 at 01:01:16PM -0700, David S. Miller wrote:
> > Part of a data type exported to userspace, is it not?
> > Thus, is it really valid to change it like this?
>
> They're copied raw to userspace now and casted to atomic_t for all
> modifications except for sparc32, arm, h8300, m68k, and m68knommu,
> where it's still equivalent (the atomic operations just do normal
> arithmetic under hashed locks or with ll/sc or other easily zennable
> asm), so there is no change. Or did I miss an arch?

That sets my mind at ease, looks good.

2004-06-22 20:12:03

by David Miller

[permalink] [raw]
Subject: Re: [profile]: [21/23] use atomic_t for prof_buffer

On Tue, 22 Jun 2004 08:17:55 -0700
William Lee Irwin III <[email protected]> wrote:

> Convert prof_buffer to an array of atomic_t's.

Part of a data type exported to userspace, is it not?
Thus, is it really valid to change it like this?

2004-06-23 01:48:45

by Anton Blanchard

[permalink] [raw]
Subject: Re: [profile]: [0/23] mmap() support for /proc/profile


> I was trying to profile a mostly-idle workload to get an idea of what
> area of the kernel things were diving into and falling asleep in during
> an OAST run. Without these patches, kerneltop et al showed heavy /proc/
> activity along with copy_to_user() at the top of the profiles.

Interesting stuff. FYI we did some analysis of the hottest addresses in
the kernel and profile_lock featured very high up:

void profile_hook(struct pt_regs * regs)
{
read_lock(&profile_lock);
notifier_call_chain(&profile_listeners, 0, regs);
read_unlock(&profile_lock);
}

Thats 2 atomic operations to the same cacheline per timer interrupt per
cpu. Considering how rarely timer based profiling is used, perhaps RCU
or even just a profiling_enabled sysctl flag would help here. Id prefer
not to compile it out in distro kernels if possible, its a very useful
feature when required.

In the mean time, how about this quick fix?

Anton

--

Cacheline align profile_lock, analysis shows it to be one of the hottest
memory locations on large SMP boxes.

Signed-off-by: Anton Blanchard <[email protected]>

===== kernel/profile.c 1.5 vs edited =====
--- 1.5/kernel/profile.c Wed Jul 16 18:09:04 2003
+++ edited/kernel/profile.c Wed Jun 23 09:13:28 2004
@@ -8,6 +8,7 @@
#include <linux/bootmem.h>
#include <linux/notifier.h>
#include <linux/mm.h>
+#include <linux/cache.h>
#include <asm/sections.h>

unsigned int * prof_buffer;
@@ -119,7 +120,7 @@
}

static struct notifier_block * profile_listeners;
-static rwlock_t profile_lock = RW_LOCK_UNLOCKED;
+static rwlock_t profile_lock __cacheline_aligned_in_smp = RW_LOCK_UNLOCKED;

int register_profile_notifier(struct notifier_block * nb)
{

2004-06-23 02:00:23

by David Miller

[permalink] [raw]
Subject: Re: [profile]: [0/23] mmap() support for /proc/profile

On Wed, 23 Jun 2004 09:16:46 +1000
Anton Blanchard <[email protected]> wrote:

> Considering how rarely timer based profiling is used, perhaps RCU
> or even just a profiling_enabled sysctl flag would help here.

RCU seems very appropriate for this.

2004-06-23 02:05:17

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [profile]: [0/23] mmap() support for /proc/profile

On Wed, Jun 23, 2004 at 09:16:46AM +1000, Anton Blanchard wrote:
> Interesting stuff. FYI we did some analysis of the hottest addresses in
> the kernel and profile_lock featured very high up:
> void profile_hook(struct pt_regs * regs)
> {
> read_lock(&profile_lock);
> notifier_call_chain(&profile_listeners, 0, regs);
> read_unlock(&profile_lock);
> }
> Thats 2 atomic operations to the same cacheline per timer interrupt per
> cpu. Considering how rarely timer based profiling is used, perhaps RCU
> or even just a profiling_enabled sysctl flag would help here. Id prefer
> not to compile it out in distro kernels if possible, its a very useful
> feature when required.
> In the mean time, how about this quick fix?

Well, this is a little different. I was more concerned about the
"Heisenberg effect" that the in-kernel copies to fetch profiling data
had upon the data fetched. i.e. instead of idle time and the stuff I
was looking for, I saw copy_to_user() and all kinds of vfs and /proc/
crap instead, which blew what I was looking for completely out of the
top 20. The profiling I did was on UP, which was done in part to
eliminate lock contention as the cause of the phenomena I had observed.

Also, Randy said I should mention that my kerneltop-like thingie is
about 5 times faster (well, uses about 20% of the cpu time) as the
read()-based kerneltop. Which is all well and good, but the reason I
actually needed this was to get rid of the Heisenberg problem.


-- wli

2004-06-23 03:00:43

by Anton Blanchard

[permalink] [raw]
Subject: Re: [profile]: [0/23] mmap() support for /proc/profile


Hi,

> Well, this is a little different. I was more concerned about the
> "Heisenberg effect" that the in-kernel copies to fetch profiling data
> had upon the data fetched. i.e. instead of idle time and the stuff I
> was looking for, I saw copy_to_user() and all kinds of vfs and /proc/
> crap instead, which blew what I was looking for completely out of the
> top 20. The profiling I did was on UP, which was done in part to
> eliminate lock contention as the cause of the phenomena I had observed.

Sure, I was just pointing out another area in our profiling code that
warrants attention :)

Anton

2004-06-29 18:59:01

by David Mosberger

[permalink] [raw]
Subject: Re: [profile]: [10/23] ia64 profiling cleanups

>>>>> On Tue, 22 Jun 2004 08:17:09 -0700, William Lee Irwin III <[email protected]> said:

William> Convert ia64 to use profiling_on() and profile_tick().

This patch looks fine to me (well, it took me a while to figure out
that you hid profiling_on() and profile_tick() inside the ppc32 diff;
bad boy! ;-).

--david

2004-06-29 19:30:18

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [profile]: [10/23] ia64 profiling cleanups

On Tue, 22 Jun 2004 08:17:09 -0700, William Lee Irwin III <[email protected]> said:
William> Convert ia64 to use profiling_on() and profile_tick().

On Tue, Jun 29, 2004 at 11:56:29AM -0700, David Mosberger wrote:
> This patch looks fine to me (well, it took me a while to figure out
> that you hid profiling_on() and profile_tick() inside the ppc32 diff;
> bad boy! ;-).

Ah, yes, I folded the implementation of the API into the conversion
patch for the first user converted. Since this obfuscates the series
when it's later introduced in a large number of places, I'll change that.

Thanks.


-- wli