2009-11-13 04:23:11

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH 0/4] perf bench: Add new subsystem "mem" and new suite "memcpy"

This patch series adds new subysystem "mem"
and new suite "memcpy" for it.

I've not added the memcpy() of Nehalem oriented optimization
by Ling Ma. The original memcpy() contains a lot of
separated inline assembly, and label with name of single digit
caused conflict with other part of assembled code.
So I'll rewrite it for readability and post
the patch to add memcpy() by Ling later.

This is first version. Could you review this?

Hitoshi Mitake (4):
perf bench: Add new subsystem and new suite, bench/mem-memcpy.c
perf bench: Modify bench/bench.h for new prototype:
bench_mem_memcpy()
perf bench: Modify builtin-bench.c for new subsystem "mem"
perf bench: Modify Makefile to build new subsystem "mem"

tools/perf/Makefile | 3 +-
tools/perf/bench/bench.h | 1 +
tools/perf/bench/mem-memcpy.c | 258 +++++++++++++++++++++++++++++++++++++++++
tools/perf/builtin-bench.c | 14 ++-
4 files changed, 274 insertions(+), 2 deletions(-)
create mode 100644 tools/perf/bench/mem-memcpy.c


2009-11-13 04:23:45

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH 1/4] perf bench: Add new subsystem and new suite, bench/mem-memcpy.c

This patch adds bench/mem-memcpy.c.
This file provides new subsystem "mem": evaluating for memory performance,
and new suite "memcpy": measurements performance of memcpy(2) like function.

bench/mem-memcpy.c will be start point for comparing
different algorithms of memcpy() on same CPU
or same memcpy() on different CPUs.

Current supported memcpy() is memcpy() provided by glibc.

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>

| % perf bench mem memcpy -l 1GB -c # Measure in clock
| # Running mem/memcpy benchmark...
| # Copying 1GB Bytes from 0x7f53f8c25010 to 0x7f5438c26010 ...
|
| 2.156751 Clock/Byte
| % perf bench mem memcpy -l 1GB # Measure in time(default)
| # Running mem/memcpy benchmark...
| # Copying 1GB Bytes from 0x7f2cffefb010 to 0x7f2d3fefc010 ...
|
| 1.415502 GB/Sec
---
tools/perf/bench/mem-memcpy.c | 258 +++++++++++++++++++++++++++++++++++++++++
1 files changed, 258 insertions(+), 0 deletions(-)
create mode 100644 tools/perf/bench/mem-memcpy.c

diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644
index 0000000..dd9cbc6
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy.c
@@ -0,1 +1,258 @@
+/*
+ *
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Based on memcpy.c by Ma Ling <[email protected]>
+ * http://marc.info/?l=linux-kernel&m=125792321123782&w=2
+ * This memcpy.c is posted to LKML by Ma Ling for comparing
+ * two ways of memory copying.
+ * The thread is started from
+ * http://marc.info/?l=linux-kernel&m=125750023424093&w=2
+ *
+ * Ported to perf by Hitoshi Mitake <[email protected]>
+ *
+ */
+
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../builtin.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <assert.h>
+
+#define K 1024
+static char *length_str = (char *)"1MB";
+static char *routine = (char *)"default";
+static int use_clockcycle = 0;
+
+typedef unsigned long int clockcycle_t;
+
+#ifdef x86_64
+
+static inline clockcycle_t get_clock(void)
+{
+ long int ret;
+
+ asm("rdtsc; shlq $32, %%rdx;"
+ "orq %%rdx, %%rax;"
+ "movq %%rax, %0;"
+ : "=r" (ret));
+
+ return ret;
+}
+
+#endif /* x86_64 */
+
+static const struct option options[] = {
+ OPT_STRING('l', "length", &length_str, "1MB",
+ "Specify length of memory to copy. "
+ "available unit: B, MB, GB (upper and lower)"),
+ OPT_STRING('r', "routine", &routine, "default",
+ "Specify routine to copy"),
+#ifdef x86_64
+ /*
+ * TODO: This should be expanded to any architecuture
+ * perf supports
+ */
+ OPT_BOOLEAN('c', "clockcycle", &use_clockcycle,
+ "Use CPU's clock cycle for measurement"),
+#endif /* x86_64 */
+ OPT_END()
+};
+
+struct routine {
+ const char *name;
+ void * (*fn)(void *dst, const void *src, size_t len);
+ const char *desc;
+};
+
+struct routine routines[] = {
+ { "default",
+ memcpy,
+ "Default memcpy() provided by glibc" },
+ { NULL,
+ NULL,
+ NULL }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+ "perf bench mem memcpy <options>",
+ NULL
+};
+
+static size_t str2length(char *_str)
+{
+ int i, unit = 1;
+ char *str;
+ size_t length = -1;
+
+ str = calloc(strlen(_str) + 1, sizeof(char));
+ assert(str);
+ strcpy(str, _str);
+
+ if (!isdigit(str[0]))
+ goto err;
+
+ for (i = 1; i < (int)strlen(str); i++) {
+ switch ((int)str[i]) {
+ case 'B':
+ case 'b':
+ str[i] = '\0';
+ break;
+ case 'K':
+ case 'k':
+ if (str[i + 1] != 'B' && str[i + 1] != 'b')
+ goto err;
+ unit = K;
+ str[i] = '\0';
+ break;
+ case 'M':
+ case 'm':
+ if (str[i + 1] != 'B' && str[i + 1] != 'b')
+ goto err;
+ unit = K * K;
+ str[i] = '\0';
+ break;
+ case 'G':
+ case 'g':
+ if (str[i + 1] != 'B' && str[i + 1] != 'b')
+ goto err;
+ unit = K * K * K;
+ str[i] = '\0';
+ break;
+ case '\0': /* only specified figures */
+ unit = 1;
+ break;
+ default:
+ if (!isdigit(str[i]))
+ goto err;
+ break;
+ }
+ }
+
+ length = atoi(str) * unit;
+ goto end;
+
+err:
+ fprintf(stderr, "Invalid length:%s\n", str);
+end:
+ free(str);
+ return length;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+ return (double)ts->tv_sec +
+ (double)ts->tv_usec / (double)1000000;
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+ const char *prefix __used)
+{
+ int i;
+ void *dst, *src;
+ struct timeval start, stop, diff;
+ clockcycle_t clock_start = 0, clock_diff = 0;
+ size_t length;
+ double bps = 0.0;
+
+ argc = parse_options(argc, argv, options,
+ bench_mem_memcpy_usage, 0);
+
+ /*
+ * Caution!
+ * Without the statement
+ * gettimeofday(&diff, NULL);
+ * compiler warns (and build environment of perf regards it as error)
+ * like this,
+ * bench/mem-memcpy.c:93: error: ‘diff.tv_sec’ may be\
+ * used uninitialized in this function
+ * bench/mem-memcpy.c:93: error: ‘diff.tv_usec’ may be\
+ * used uninitialized in this function
+ *
+ * hmm...
+ */
+ gettimeofday(&diff, NULL);
+
+ length = str2length(length_str);
+ if ((int)length < 0)
+ return 1;
+
+ for (i = 0; routines[i].name; i++)
+ if (!strcmp(routines[i].name, routine))
+ break;
+ if (!routines[i].name) {
+ printf("Unknown routine:%s\n", routine);
+ printf("Available routines...\n");
+ for (i = 0; routines[i].name; i++)
+ printf("\t%s ... %s\n",
+ routines[i].name, routines[i].desc);
+ return 1;
+ }
+
+ dst = calloc(length, sizeof(char));
+ assert(dst);
+ src = calloc(length, sizeof(char));
+ assert(src);
+
+ if (bench_format == BENCH_FORMAT_DEFAULT)
+ printf("# Copying %s Bytes from %p to %p ...\n\n",
+ length_str, src, dst);
+
+ if (use_clockcycle) {
+ clock_start = get_clock();
+ } else {
+ gettimeofday(&start, NULL);
+ }
+
+ routines[i].fn(dst, src, length);
+
+ if (use_clockcycle) {
+ clock_diff = get_clock() - clock_start;
+ } else {
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+ bps = (double)((double)length / timeval2double(&diff));
+ }
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ if (use_clockcycle)
+ printf(" %14lf Clock/Byte\n",
+ (double)clock_diff / (double)length);
+ else
+ if (bps < K)
+ printf(" %14lf B/Sec\n", bps);
+ else if (bps < K * K)
+ printf(" %14lfd KB/Sec\n", bps / 1024);
+ else if (bps < K * K * K)
+ printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
+ else
+ printf(" %14lf GB/Sec\n",
+ bps / 1024 / 1024 / 1024);
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ if (use_clockcycle)
+ printf("%lf\n",
+ (double)clock_diff / (double)length);
+ else
+ printf("%lf\n", bps);
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
--
1.6.5.2

2009-11-13 04:23:14

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH 2/4] perf bench: Modify bench/bench.h for new prototype: bench_mem_memcpy()

This patch inserts new prototype of bench_mem_memcpy().

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/bench/bench.h | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 9fbd8d7..1670e32 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -3,6 +3,7 @@

extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix);

#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
--
1.6.5.2

2009-11-13 04:23:36

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH 3/4] perf bench: Modify builtin-bench.c for new subsystem "mem"

This patch modifies builtin-bench.c to make new subsystem
"mem" executable.

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/builtin-bench.c | 14 +++++++++++++-
1 files changed, 13 insertions(+), 1 deletions(-)

diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 90c39ba..0e58fdf 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -12,7 +12,7 @@
*
* Available subsystem list:
* sched ... scheduler and IPC mechanism
- *
+ * mem ... memory access performance
*/

#include "perf.h"
@@ -43,6 +43,15 @@ static struct bench_suite sched_suites[] = {
NULL }
};

+static struct bench_suite mem_suites[] = {
+ { "memcpy",
+ "Simple memory copy in various ways",
+ bench_mem_memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
struct bench_subsys {
const char *name;
const char *summary;
@@ -53,7 +62,9 @@ static struct bench_subsys subsystems[] = {
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
+ { "mem",
+ "memory access performance",
+ mem_suites },
{ NULL,
NULL,
NULL }
--
1.6.5.2

2009-11-13 04:23:33

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH 4/4] perf bench: Modify Makefile to build new subsystem "mem"

This patch modifies Makefile to build new subsystem "mem".
Because of the dependency on architecture of "mem",
new option "-D$(uname_M)" was added to CFLAGS.

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/Makefile | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index cd42c97..ddae4de 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -207,7 +207,7 @@ ifndef PERF_DEBUG
CFLAGS_OPTIMIZE = -O6
endif

-CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS)
+CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) -D$(uname_M)
LDFLAGS = -lpthread -lrt -lelf -lm
ALL_CFLAGS = $(CFLAGS)
ALL_LDFLAGS = $(LDFLAGS)
@@ -428,6 +428,7 @@ BUILTIN_OBJS += builtin-bench.o
# Benchmark modules
BUILTIN_OBJS += bench/sched-messaging.o
BUILTIN_OBJS += bench/sched-pipe.o
+BUILTIN_OBJS += bench/mem-memcpy.o

BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o
--
1.6.5.2

2009-11-13 09:47:06

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/4] perf bench: Add new subsystem and new suite, bench/mem-memcpy.c


* Hitoshi Mitake <[email protected]> wrote:

> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/time.h>
> +#include <assert.h>
> +
> +#define K 1024
> +static char *length_str = (char *)"1MB";
> +static char *routine = (char *)"default";

no cast is needed for string literals.

> +static int use_clockcycle = 0;

also, please use the vertical alignment initialization style used in
other builtin-*.c tools.

> +
> +typedef unsigned long int clockcycle_t;

We dont do new typedefs in .c's generally. It should be put into a
header - but i think using u64 would be good too.

> +
> +#ifdef x86_64
> +
> +static inline clockcycle_t get_clock(void)
> +{
> + long int ret;
> +
> + asm("rdtsc; shlq $32, %%rdx;"
> + "orq %%rdx, %%rax;"
> + "movq %%rax, %0;"
> + : "=r" (ret));
> +
> + return ret;
> +}
> +
> +#endif /* x86_64 */

There's full rdtscll implementations in arch/x86/include/asm/tsc.h. They
should either be included - or copied in part.

> +static const struct option options[] = {
> + OPT_STRING('l', "length", &length_str, "1MB",
> + "Specify length of memory to copy. "
> + "available unit: B, MB, GB (upper and lower)"),
> + OPT_STRING('r', "routine", &routine, "default",
> + "Specify routine to copy"),
> +#ifdef x86_64
> + /*
> + * TODO: This should be expanded to any architecuture
> + * perf supports
> + */
> + OPT_BOOLEAN('c', "clockcycle", &use_clockcycle,
> + "Use CPU's clock cycle for measurement"),
> +#endif /* x86_64 */

That #ifdef x86_64 looks quite ugly.

Also, why not use the 'cycles' perf event to retrieve cycles?

> + OPT_END()
> +};
> +
> +struct routine {
> + const char *name;
> + void * (*fn)(void *dst, const void *src, size_t len);
> + const char *desc;

We try to align structure definitions vertically too.

> +};
> +
> +struct routine routines[] = {
> + { "default",
> + memcpy,
> + "Default memcpy() provided by glibc" },
> + { NULL,
> + NULL,
> + NULL }

{ NULL, } would be equivalent i guess.

> +};
> +
> +static const char * const bench_mem_memcpy_usage[] = {
> + "perf bench mem memcpy <options>",
> + NULL
> +};
> +
> +static size_t str2length(char *_str)
> +{
> + int i, unit = 1;
> + char *str;
> + size_t length = -1;
> +
> + str = calloc(strlen(_str) + 1, sizeof(char));
> + assert(str);
> + strcpy(str, _str);
> +
> + if (!isdigit(str[0]))
> + goto err;
> +
> + for (i = 1; i < (int)strlen(str); i++) {

if 'i' was of type unsigned long then the (int) cast wouldnt be needed i
suspect?

> + switch ((int)str[i]) {

is the cast to 'int' needed here?

> + case 'B':
> + case 'b':
> + str[i] = '\0';
> + break;
> + case 'K':
> + case 'k':
> + if (str[i + 1] != 'B' && str[i + 1] != 'b')
> + goto err;
> + unit = K;
> + str[i] = '\0';
> + break;
> + case 'M':
> + case 'm':
> + if (str[i + 1] != 'B' && str[i + 1] != 'b')
> + goto err;
> + unit = K * K;
> + str[i] = '\0';
> + break;
> + case 'G':
> + case 'g':
> + if (str[i + 1] != 'B' && str[i + 1] != 'b')
> + goto err;
> + unit = K * K * K;
> + str[i] = '\0';
> + break;
> + case '\0': /* only specified figures */
> + unit = 1;
> + break;
> + default:
> + if (!isdigit(str[i]))
> + goto err;
> + break;
> + }
> + }
> +
> + length = atoi(str) * unit;
> + goto end;
> +
> +err:
> + fprintf(stderr, "Invalid length:%s\n", str);
> +end:
> + free(str);
> + return length;
> +}

This should go until a utils/*.c helper file i suspect.

> +
> +static double timeval2double(struct timeval *ts)
> +{
> + return (double)ts->tv_sec +
> + (double)ts->tv_usec / (double)1000000;
> +}
> +
> +int bench_mem_memcpy(int argc, const char **argv,
> + const char *prefix __used)
> +{
> + int i;
> + void *dst, *src;
> + struct timeval start, stop, diff;
> + clockcycle_t clock_start = 0, clock_diff = 0;
> + size_t length;
> + double bps = 0.0;
> +
> + argc = parse_options(argc, argv, options,
> + bench_mem_memcpy_usage, 0);
> +
> + /*
> + * Caution!
> + * Without the statement
> + * gettimeofday(&diff, NULL);
> + * compiler warns (and build environment of perf regards it as error)
> + * like this,
> + * bench/mem-memcpy.c:93: error: ‘diff.tv_sec’ may be\
> + * used uninitialized in this function
> + * bench/mem-memcpy.c:93: error: ‘diff.tv_usec’ may be\
> + * used uninitialized in this function
> + *
> + * hmm...
> + */
> + gettimeofday(&diff, NULL);

well, because 'gettimeofday' could fail in theory and then 'diff'
remains uninitialized. Initializing it would solve that.

> +
> + length = str2length(length_str);
> + if ((int)length < 0)
> + return 1;

str2length should return a proper type instead of forcing a (int) cast
here.

> +
> + for (i = 0; routines[i].name; i++)
> + if (!strcmp(routines[i].name, routine))
> + break;

Please use { } curly braces around all non-single-line statements. I.e.
the above should be:

for (i = 0; routines[i].name; i++) {
if (!strcmp(routines[i].name, routine))
break;
}

It's a tiny bit longer but more robust.

> + if (!routines[i].name) {
> + printf("Unknown routine:%s\n", routine);
> + printf("Available routines...\n");
> + for (i = 0; routines[i].name; i++)
> + printf("\t%s ... %s\n",
> + routines[i].name, routines[i].desc);
> + return 1;
> + }
> +
> + dst = calloc(length, sizeof(char));
> + assert(dst);
> + src = calloc(length, sizeof(char));
> + assert(src);

Please use BUG_ON() - we try to standardize on kernel code style in perf
tooling.

> +
> + if (bench_format == BENCH_FORMAT_DEFAULT)
> + printf("# Copying %s Bytes from %p to %p ...\n\n",
> + length_str, src, dst);

curly braces needed.

> +
> + if (use_clockcycle) {
> + clock_start = get_clock();
> + } else {
> + gettimeofday(&start, NULL);
> + }

these curly braces are not needed. (but this code would probably go away
if the code used perf events to retrieve cycles or time of day elapsed
time.)

> +
> + routines[i].fn(dst, src, length);
> +
> + if (use_clockcycle) {
> + clock_diff = get_clock() - clock_start;
> + } else {
> + gettimeofday(&stop, NULL);
> + timersub(&stop, &start, &diff);
> + bps = (double)((double)length / timeval2double(&diff));
> + }
> +
> + switch (bench_format) {
> + case BENCH_FORMAT_DEFAULT:
> + if (use_clockcycle)
> + printf(" %14lf Clock/Byte\n",
> + (double)clock_diff / (double)length);
> + else
> + if (bps < K)
> + printf(" %14lf B/Sec\n", bps);
> + else if (bps < K * K)
> + printf(" %14lfd KB/Sec\n", bps / 1024);
> + else if (bps < K * K * K)
> + printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
> + else
> + printf(" %14lf GB/Sec\n",
> + bps / 1024 / 1024 / 1024);

curly braces needed.

> + break;
> + case BENCH_FORMAT_SIMPLE:
> + if (use_clockcycle)
> + printf("%lf\n",
> + (double)clock_diff / (double)length);
> + else
> + printf("%lf\n", bps);
> + break;
> + default:
> + /* reaching here is something disaster */
> + fprintf(stderr, "Unknown format:%d\n", bench_format);

could use pr_err() here i guess.

> + exit(1);
> + break;
> + }
> +
> + return 0;
> +}

Thanks,

Ingo

2009-11-15 03:50:26

by Hitoshi Mitake

[permalink] [raw]
Subject: Re: [PATCH 1/4] perf bench: Add new subsystem and new suite, bench/mem-memcpy.c

From: Ingo Molnar <[email protected]>
Subject: Re: [PATCH 1/4] perf bench: Add new subsystem and new suite, bench/mem-memcpy.c
Date: Fri, 13 Nov 2009 10:46:50 +0100

Thanks for your detailed review, Ingo.
I'll fix the points you mentioned, and,

> > + case 'B':
> > + case 'b':
> > + str[i] = '\0';
> > + break;
> > + case 'K':
> > + case 'k':
> > + if (str[i + 1] != 'B' && str[i + 1] != 'b')
> > + goto err;
> > + unit = K;
> > + str[i] = '\0';
> > + break;
> > + case 'M':
> > + case 'm':
> > + if (str[i + 1] != 'B' && str[i + 1] != 'b')
> > + goto err;
> > + unit = K * K;
> > + str[i] = '\0';
> > + break;
> > + case 'G':
> > + case 'g':
> > + if (str[i + 1] != 'B' && str[i + 1] != 'b')
> > + goto err;
> > + unit = K * K * K;
> > + str[i] = '\0';
> > + break;
> > + case '\0': /* only specified figures */
> > + unit = 1;
> > + break;
> > + default:
> > + if (!isdigit(str[i]))
> > + goto err;
> > + break;
> > + }
> > + }
> > +
> > + length = atoi(str) * unit;
> > + goto end;
> > +
> > +err:
> > + fprintf(stderr, "Invalid length:%s\n", str);
> > +end:
> > + free(str);
> > + return length;
> > +}
>
> This should go until a utils/*.c helper file i suspect.
>

before posting patch series mem-memcpy.c related to,
I'll send the patch to add the function that you should go util/.
Because I think this is the independent topic from mem-memcpy.c

Thanks
Hitoshi

2009-11-17 11:19:29

by Hitoshi Mitake

[permalink] [raw]
Subject: Re: [PATCH 1/4] perf bench: Add new subsystem and new suite, bench/mem-memcpy.c

From: Ingo Molnar <[email protected]>
Subject: Re: [PATCH 1/4] perf bench: Add new subsystem and new suite, bench/mem-memcpy.c
Date: Fri, 13 Nov 2009 10:46:50 +0100

I've fixed mem-memcpy.c according to your comment,
but I also have some comments.

>
> > +};
> > +
> > +struct routine routines[] = {
> > + { "default",
> > + memcpy,
> > + "Default memcpy() provided by glibc" },
> > + { NULL,
> > + NULL,
> > + NULL }
>
> { NULL, } would be equivalent i guess.

Initializing the termination with { NULL, } causes build error.
So I can't change this point.

> > + break;
> > + case BENCH_FORMAT_SIMPLE:
> > + if (use_clockcycle)
> > + printf("%lf\n",
> > + (double)clock_diff / (double)length);
> > + else
> > + printf("%lf\n", bps);
> > + break;
> > + default:
> > + /* reaching here is something disaster */
> > + fprintf(stderr, "Unknown format:%d\n", bench_format);
>
> could use pr_err() here i guess.

It seems that pr_err() is a wrapper for printk(),
so I can't use pr_err() in perf.

I'll send the patch later. Thanks for your comments!
Hitoshi

2009-11-17 11:21:59

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/4] perf bench: Add new subsystem and new suite, bench/mem-memcpy.c


* Hitoshi Mitake <[email protected]> wrote:

> From: Ingo Molnar <[email protected]>
> Subject: Re: [PATCH 1/4] perf bench: Add new subsystem and new suite, bench/mem-memcpy.c
> Date: Fri, 13 Nov 2009 10:46:50 +0100
>
> I've fixed mem-memcpy.c according to your comment,
> but I also have some comments.
>
> >
> > > +};
> > > +
> > > +struct routine routines[] = {
> > > + { "default",
> > > + memcpy,
> > > + "Default memcpy() provided by glibc" },
> > > + { NULL,
> > > + NULL,
> > > + NULL }
> >
> > { NULL, } would be equivalent i guess.
>
> Initializing the termination with { NULL, } causes build error.
> So I can't change this point.

ah, ok.

>
> > > + break;
> > > + case BENCH_FORMAT_SIMPLE:
> > > + if (use_clockcycle)
> > > + printf("%lf\n",
> > > + (double)clock_diff / (double)length);
> > > + else
> > > + printf("%lf\n", bps);
> > > + break;
> > > + default:
> > > + /* reaching here is something disaster */
> > > + fprintf(stderr, "Unknown format:%d\n", bench_format);
> >
> > could use pr_err() here i guess.
>
> It seems that pr_err() is a wrapper for printk(),
> so I can't use pr_err() in perf.

ok.

Ingo

2009-11-17 11:42:42

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH 1/4] perf bench: Add mem-memcpy.c:memcpy() benchmark

This patch adds new file mem-memcpy.c.
mem-memcpy.c is benchmark suite for measuring memcpy() performance.

This patch is version 2.
mem-memcpy.c of this version uses perf event systemcall to
obtain clock cycle.

Example of use:

| % perf bench mem memcpy -l 1GB
| # Running mem/memcpy benchmark...
| # Copying 1GB Bytes from 0x7f0bf417d010 to 0x7f0c3417e010 ...
|
| 830.937491 MB/Sec
| % perf bench mem memcpy -l 1GB -c
| # Running mem/memcpy benchmark...
| # Copying 1GB Bytes from 0x7f113fbf6010 to 0x7f117fbf7010 ...
|
| 3.259315 Clock/Byte

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/bench/mem-memcpy.c | 196 +++++++++++++++++++++++++++++++++++++++++
1 files changed, 196 insertions(+), 0 deletions(-)
create mode 100644 tools/perf/bench/mem-memcpy.c

diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644
index 0000000..21a0f00
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy.c
@@ -0,0 +1,196 @@
+/*
+ *
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Based on memcpy.c by Ma Ling <[email protected]>
+ * http://marc.info/?l=linux-kernel&m=125792321123782&w=2
+ * This memcpy.c is posted to LKML by Ma Ling for comparing
+ * two ways of memory copying.
+ * The thread is started from
+ * http://marc.info/?l=linux-kernel&m=125750023424093&w=2
+ *
+ * Ported to perf by Hitoshi Mitake <[email protected]>
+ *
+ */
+
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/string.h"
+#include "../util/header.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char *length_str = "1MB";
+static const char *routine = "default";
+static int use_clock = 0;
+
+static const struct option options[] = {
+ OPT_STRING('l', "length", &length_str, "1MB",
+ "Specify length of memory to copy. "
+ "available unit: B, MB, GB (upper and lower)"),
+ OPT_STRING('r', "routine", &routine, "default",
+ "Specify routine to copy"),
+ OPT_BOOLEAN('c', "clock", &use_clock,
+ "Use CPU clock for measuring"),
+ OPT_END()
+};
+
+struct routine {
+ const char *name;
+ const char *desc;
+ void * (*fn)(void *dst, const void *src, size_t len);
+};
+
+struct routine routines[] = {
+ { "default",
+ "Default memcpy() provided by glibc",
+ memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+ "perf bench mem memcpy <options>",
+ NULL
+};
+
+static int clock_fd;
+
+static struct perf_event_attr clock_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+ clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+ BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+ int ret;
+ u64 clk;
+
+ ret = read(clock_fd, &clk, sizeof(u64));
+ BUG_ON(ret != sizeof(u64));
+
+ return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+ return (double)ts->tv_sec +
+ (double)ts->tv_usec / (double)1000000;
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+ const char *prefix __used)
+{
+ int i;
+ void *dst, *src;
+ size_t length;
+ double bps = 0.0;
+ struct timeval tv_start, tv_end, tv_diff;
+ u64 clock_start, clock_end, clock_diff;
+
+ clock_start = clock_end = clock_diff = 0ULL;
+ argc = parse_options(argc, argv, options,
+ bench_mem_memcpy_usage, 0);
+
+ tv_diff.tv_sec = 0;
+ tv_diff.tv_usec = 0;
+ length = (size_t)perf_atoll((char *)length_str);
+ if ((long long int)length <= 0) {
+ fprintf(stderr, "Invalid length:%s, %lu\n", length_str, length);
+ return 1;
+ }
+
+ for (i = 0; routines[i].name; i++) {
+ if (!strcmp(routines[i].name, routine))
+ break;
+ }
+ if (!routines[i].name) {
+ printf("Unknown routine:%s\n", routine);
+ printf("Available routines...\n");
+ for (i = 0; routines[i].name; i++) {
+ printf("\t%s ... %s\n",
+ routines[i].name, routines[i].desc);
+ }
+ return 1;
+ }
+
+ dst = calloc(length, sizeof(char));
+ assert(dst);
+ src = calloc(length, sizeof(char));
+ assert(src);
+
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
+ printf("# Copying %s Bytes from %p to %p ...\n\n",
+ length_str, src, dst);
+ }
+
+ if (use_clock) {
+ init_clock();
+ clock_start = get_clock();
+ } else
+ BUG_ON(gettimeofday(&tv_start, NULL));
+
+ routines[i].fn(dst, src, length);
+
+ if (use_clock) {
+ clock_end = get_clock();
+ clock_diff = clock_end - clock_start;
+ } else {
+ BUG_ON(gettimeofday(&tv_end, NULL));
+ timersub(&tv_end, &tv_start, &tv_diff);
+ bps = (double)((double)length / timeval2double(&tv_diff));
+ }
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ if (use_clock) {
+ printf(" %14lf Clock/Byte\n",
+ (double)clock_diff / (double)length);
+ } else {
+ if (bps < K)
+ printf(" %14lf B/Sec\n", bps);
+ else if (bps < K * K)
+ printf(" %14lfd KB/Sec\n", bps / 1024);
+ else if (bps < K * K * K)
+ printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
+ else {
+ printf(" %14lf GB/Sec\n",
+ bps / 1024 / 1024 / 1024);
+ }
+ }
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ if (use_clock) {
+ printf("%14lf\n",
+ (double)clock_diff / (double)length);
+ } else
+ printf("%lf\n", bps);
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
--
1.6.5.2

2009-11-17 11:42:38

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH 2/4] perf bench: Modify bench.h for prototype of bench_mem_memcpy()

This patch modifies bench.h to add the prototype of new function
bench_mem_memcpy(), memcpy() benchmark suite.

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/bench/bench.h | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 9fbd8d7..f7781c6 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -3,6 +3,7 @@

extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);

#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
--
1.6.5.2

2009-11-17 11:43:03

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH 3/4] perf bench: Modify builtin-bench.c for new subsystem "mem"

This patch modifies builtin-bench.c to make it be able to
invoke suites of subsystem "mem".

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/builtin-bench.c | 15 ++++++++++++++-
1 files changed, 14 insertions(+), 1 deletions(-)

diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 90c39ba..e043eb8 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -12,6 +12,7 @@
*
* Available subsystem list:
* sched ... scheduler and IPC mechanism
+ * mem ... memory access performance
*
*/

@@ -43,6 +44,15 @@ static struct bench_suite sched_suites[] = {
NULL }
};

+static struct bench_suite mem_suites[] = {
+ { "memcpy",
+ "Simple memory copy in various ways",
+ bench_mem_memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
struct bench_subsys {
const char *name;
const char *summary;
@@ -53,9 +63,12 @@ static struct bench_subsys subsystems[] = {
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
+ { "mem",
+ "memory access performance",
+ mem_suites },
{ NULL,
NULL,
- NULL }
+ NULL }
};

static void dump_suites(int subsys_index)
--
1.6.5.2

2009-11-17 11:42:40

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c

This patch modifies Makefile to build new source file mem-memcpy.c,
benchmark suite for memcpy() performance.

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/Makefile | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index cd42c97..9b25bca 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -428,6 +428,7 @@ BUILTIN_OBJS += builtin-bench.o
# Benchmark modules
BUILTIN_OBJS += bench/sched-messaging.o
BUILTIN_OBJS += bench/sched-pipe.o
+BUILTIN_OBJS += bench/mem-memcpy.o

BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o
--
1.6.5.2

2009-11-17 12:53:19

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/4] perf bench: Add mem-memcpy.c:memcpy() benchmark


FYI, it fails to build on 32-bit systems, with:

bench/mem-memcpy.c: In function 'bench_mem_memcpy':
bench/mem-memcpy.c:118: error: format '%lu' expects type 'long unsigned int', but argument 4 has type 'size_t'

Ingo

2009-11-17 14:36:36

by Hitoshi Mitake

[permalink] [raw]
Subject: Re: [PATCH 1/4] perf bench: Add mem-memcpy.c:memcpy() benchmark

From: Ingo Molnar <[email protected]>
Subject: Re: [PATCH 1/4] perf bench: Add mem-memcpy.c:memcpy() benchmark
Date: Tue, 17 Nov 2009 13:53:15 +0100

>
> FYI, it fails to build on 32-bit systems, with:
>
> bench/mem-memcpy.c: In function 'bench_mem_memcpy':
> bench/mem-memcpy.c:118: error: format '%lu' expects type 'long unsigned int', but argument 4 has type 'size_t'

Sorry, I'll fix it.
# This length as argument for fprintf() makes no sense.
# I wonder why I added this...

2009-11-17 15:09:19

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH v2 1/4] perf bench: Add mem-memcpy.c:memcpy() benchmark

This patch adds new file mem-memcpy.c.
mem-memcpy.c is benchmark suite for measuring memcpy() performance.

This patch is version 2.
mem-memcpy.c of this version uses perf event systemcall to
obtain clock cycle.

Example of use:

| % perf bench mem memcpy -l 1GB
| # Running mem/memcpy benchmark...
| # Copying 1GB Bytes from 0x7f0bf417d010 to 0x7f0c3417e010 ...
|
| 830.937491 MB/Sec
| % perf bench mem memcpy -l 1GB -c
| # Running mem/memcpy benchmark...
| # Copying 1GB Bytes from 0x7f113fbf6010 to 0x7f117fbf7010 ...
|
| 3.259315 Clock/Byte

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/bench/mem-memcpy.c | 196 +++++++++++++++++++++++++++++++++++++++++
1 files changed, 196 insertions(+), 0 deletions(-)
create mode 100644 tools/perf/bench/mem-memcpy.c

diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644
index 0000000..13266da
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy.c
@@ -0,0 +1,196 @@
+/*
+ *
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Based on memcpy.c by Ma Ling <[email protected]>
+ * http://marc.info/?l=linux-kernel&m=125792321123782&w=2
+ * This memcpy.c is posted to LKML by Ma Ling for comparing
+ * two ways of memory copying.
+ * The thread is started from
+ * http://marc.info/?l=linux-kernel&m=125750023424093&w=2
+ *
+ * Ported to perf by Hitoshi Mitake <[email protected]>
+ *
+ */
+
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/string.h"
+#include "../util/header.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char *length_str = "1MB";
+static const char *routine = "default";
+static int use_clock = 0;
+
+static const struct option options[] = {
+ OPT_STRING('l', "length", &length_str, "1MB",
+ "Specify length of memory to copy. "
+ "available unit: B, MB, GB (upper and lower)"),
+ OPT_STRING('r', "routine", &routine, "default",
+ "Specify routine to copy"),
+ OPT_BOOLEAN('c', "clock", &use_clock,
+ "Use CPU clock for measuring"),
+ OPT_END()
+};
+
+struct routine {
+ const char *name;
+ const char *desc;
+ void * (*fn)(void *dst, const void *src, size_t len);
+};
+
+struct routine routines[] = {
+ { "default",
+ "Default memcpy() provided by glibc",
+ memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+ "perf bench mem memcpy <options>",
+ NULL
+};
+
+static int clock_fd;
+
+static struct perf_event_attr clock_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+ clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+ BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+ int ret;
+ u64 clk;
+
+ ret = read(clock_fd, &clk, sizeof(u64));
+ BUG_ON(ret != sizeof(u64));
+
+ return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+ return (double)ts->tv_sec +
+ (double)ts->tv_usec / (double)1000000;
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+ const char *prefix __used)
+{
+ int i;
+ void *dst, *src;
+ size_t length;
+ double bps = 0.0;
+ struct timeval tv_start, tv_end, tv_diff;
+ u64 clock_start, clock_end, clock_diff;
+
+ clock_start = clock_end = clock_diff = 0ULL;
+ argc = parse_options(argc, argv, options,
+ bench_mem_memcpy_usage, 0);
+
+ tv_diff.tv_sec = 0;
+ tv_diff.tv_usec = 0;
+ length = (size_t)perf_atoll((char *)length_str);
+ if ((long long int)length <= 0) {
+ fprintf(stderr, "Invalid length:%s\n", length_str);
+ return 1;
+ }
+
+ for (i = 0; routines[i].name; i++) {
+ if (!strcmp(routines[i].name, routine))
+ break;
+ }
+ if (!routines[i].name) {
+ printf("Unknown routine:%s\n", routine);
+ printf("Available routines...\n");
+ for (i = 0; routines[i].name; i++) {
+ printf("\t%s ... %s\n",
+ routines[i].name, routines[i].desc);
+ }
+ return 1;
+ }
+
+ dst = calloc(length, sizeof(char));
+ assert(dst);
+ src = calloc(length, sizeof(char));
+ assert(src);
+
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
+ printf("# Copying %s Bytes from %p to %p ...\n\n",
+ length_str, src, dst);
+ }
+
+ if (use_clock) {
+ init_clock();
+ clock_start = get_clock();
+ } else
+ BUG_ON(gettimeofday(&tv_start, NULL));
+
+ routines[i].fn(dst, src, length);
+
+ if (use_clock) {
+ clock_end = get_clock();
+ clock_diff = clock_end - clock_start;
+ } else {
+ BUG_ON(gettimeofday(&tv_end, NULL));
+ timersub(&tv_end, &tv_start, &tv_diff);
+ bps = (double)((double)length / timeval2double(&tv_diff));
+ }
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ if (use_clock) {
+ printf(" %14lf Clock/Byte\n",
+ (double)clock_diff / (double)length);
+ } else {
+ if (bps < K)
+ printf(" %14lf B/sec\n", bps);
+ else if (bps < K * K)
+ printf(" %14lfd KB/sec\n", bps / 1024);
+ else if (bps < K * K * K)
+ printf(" %14lf MB/sec\n", bps / 1024 / 1024);
+ else {
+ printf(" %14lf GB/sec\n",
+ bps / 1024 / 1024 / 1024);
+ }
+ }
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ if (use_clock) {
+ printf("%14lf\n",
+ (double)clock_diff / (double)length);
+ } else
+ printf("%lf\n", bps);
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
--
1.6.5.2

2009-11-17 15:09:16

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH v2 2/4] perf bench: Modify bench.h for prototype of bench_mem_memcpy()

This patch modifies bench.h to add the prototype of new function
bench_mem_memcpy(), memcpy() benchmark suite.

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/bench/bench.h | 10 +---------
1 files changed, 1 insertions(+), 9 deletions(-)

diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 3c64205..f7781c6 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -3,6 +3,7 @@

extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);

#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
@@ -13,13 +14,4 @@ extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);

extern int bench_format;

-#define BENCH_FORMAT_DEFAULT_STR "default"
-#define BENCH_FORMAT_DEFAULT 0
-#define BENCH_FORMAT_SIMPLE_STR "simple"
-#define BENCH_FORMAT_SIMPLE 1
-
-#define BENCH_FORMAT_UNKNOWN -1
-
-extern int bench_format;
-
#endif
--
1.6.5.2

2009-11-17 15:09:38

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH v2 3/4] perf bench: Modify builtin-bench.c for new subsystem "mem"

This patch modifies builtin-bench.c to make it be able to
invoke suites of subsystem "mem".

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/builtin-bench.c | 15 ++++++++++++++-
1 files changed, 14 insertions(+), 1 deletions(-)

diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 90c39ba..e043eb8 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -12,6 +12,7 @@
*
* Available subsystem list:
* sched ... scheduler and IPC mechanism
+ * mem ... memory access performance
*
*/

@@ -43,6 +44,15 @@ static struct bench_suite sched_suites[] = {
NULL }
};

+static struct bench_suite mem_suites[] = {
+ { "memcpy",
+ "Simple memory copy in various ways",
+ bench_mem_memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
struct bench_subsys {
const char *name;
const char *summary;
@@ -53,9 +63,12 @@ static struct bench_subsys subsystems[] = {
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
+ { "mem",
+ "memory access performance",
+ mem_suites },
{ NULL,
NULL,
- NULL }
+ NULL }
};

static void dump_suites(int subsys_index)
--
1.6.5.2

2009-11-17 15:09:28

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c

This patch modifies Makefile to build new source file mem-memcpy.c,
benchmark suite for memcpy() performance.

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/Makefile | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 5d1a8b0..b356987 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -430,6 +430,7 @@ BUILTIN_OBJS += builtin-bench.o
# Benchmark modules
BUILTIN_OBJS += bench/sched-messaging.o
BUILTIN_OBJS += bench/sched-pipe.o
+BUILTIN_OBJS += bench/mem-memcpy.o

BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o
--
1.6.5.2

2009-11-17 15:11:36

by Hitoshi Mitake

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c

From: Hitoshi Mitake <[email protected]>
Subject: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c
Date: Wed, 18 Nov 2009 00:09:10 +0900

VERY SORRY!
I've forgot to change version number of this patch series...
Please discard this series. I'll send newer ones later.
Hitoshi

2009-11-17 15:15:10

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH v2 1/4] perf bench: Add mem-memcpy.c:memcpy() benchmark

This patch adds new file mem-memcpy.c.
mem-memcpy.c is benchmark suite for measuring memcpy() performance.

This patch is version 2.
mem-memcpy.c of this version uses perf event systemcall to
obtain clock cycle.

Example of use:

| % perf bench mem memcpy -l 1GB
| # Running mem/memcpy benchmark...
| # Copying 1GB Bytes from 0x7f0bf417d010 to 0x7f0c3417e010 ...
|
| 830.937491 MB/Sec
| % perf bench mem memcpy -l 1GB -c
| # Running mem/memcpy benchmark...
| # Copying 1GB Bytes from 0x7f113fbf6010 to 0x7f117fbf7010 ...
|
| 3.259315 Clock/Byte

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/bench/mem-memcpy.c | 196 +++++++++++++++++++++++++++++++++++++++++
1 files changed, 196 insertions(+), 0 deletions(-)
create mode 100644 tools/perf/bench/mem-memcpy.c

diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644
index 0000000..13266da
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy.c
@@ -0,0 +1,196 @@
+/*
+ *
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Based on memcpy.c by Ma Ling <[email protected]>
+ * http://marc.info/?l=linux-kernel&m=125792321123782&w=2
+ * This memcpy.c is posted to LKML by Ma Ling for comparing
+ * two ways of memory copying.
+ * The thread is started from
+ * http://marc.info/?l=linux-kernel&m=125750023424093&w=2
+ *
+ * Ported to perf by Hitoshi Mitake <[email protected]>
+ *
+ */
+
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/string.h"
+#include "../util/header.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char *length_str = "1MB";
+static const char *routine = "default";
+static int use_clock = 0;
+
+static const struct option options[] = {
+ OPT_STRING('l', "length", &length_str, "1MB",
+ "Specify length of memory to copy. "
+ "available unit: B, MB, GB (upper and lower)"),
+ OPT_STRING('r', "routine", &routine, "default",
+ "Specify routine to copy"),
+ OPT_BOOLEAN('c', "clock", &use_clock,
+ "Use CPU clock for measuring"),
+ OPT_END()
+};
+
+struct routine {
+ const char *name;
+ const char *desc;
+ void * (*fn)(void *dst, const void *src, size_t len);
+};
+
+struct routine routines[] = {
+ { "default",
+ "Default memcpy() provided by glibc",
+ memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+ "perf bench mem memcpy <options>",
+ NULL
+};
+
+static int clock_fd;
+
+static struct perf_event_attr clock_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+ clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+ BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+ int ret;
+ u64 clk;
+
+ ret = read(clock_fd, &clk, sizeof(u64));
+ BUG_ON(ret != sizeof(u64));
+
+ return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+ return (double)ts->tv_sec +
+ (double)ts->tv_usec / (double)1000000;
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+ const char *prefix __used)
+{
+ int i;
+ void *dst, *src;
+ size_t length;
+ double bps = 0.0;
+ struct timeval tv_start, tv_end, tv_diff;
+ u64 clock_start, clock_end, clock_diff;
+
+ clock_start = clock_end = clock_diff = 0ULL;
+ argc = parse_options(argc, argv, options,
+ bench_mem_memcpy_usage, 0);
+
+ tv_diff.tv_sec = 0;
+ tv_diff.tv_usec = 0;
+ length = (size_t)perf_atoll((char *)length_str);
+ if ((long long int)length <= 0) {
+ fprintf(stderr, "Invalid length:%s\n", length_str);
+ return 1;
+ }
+
+ for (i = 0; routines[i].name; i++) {
+ if (!strcmp(routines[i].name, routine))
+ break;
+ }
+ if (!routines[i].name) {
+ printf("Unknown routine:%s\n", routine);
+ printf("Available routines...\n");
+ for (i = 0; routines[i].name; i++) {
+ printf("\t%s ... %s\n",
+ routines[i].name, routines[i].desc);
+ }
+ return 1;
+ }
+
+ dst = calloc(length, sizeof(char));
+ assert(dst);
+ src = calloc(length, sizeof(char));
+ assert(src);
+
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
+ printf("# Copying %s Bytes from %p to %p ...\n\n",
+ length_str, src, dst);
+ }
+
+ if (use_clock) {
+ init_clock();
+ clock_start = get_clock();
+ } else
+ BUG_ON(gettimeofday(&tv_start, NULL));
+
+ routines[i].fn(dst, src, length);
+
+ if (use_clock) {
+ clock_end = get_clock();
+ clock_diff = clock_end - clock_start;
+ } else {
+ BUG_ON(gettimeofday(&tv_end, NULL));
+ timersub(&tv_end, &tv_start, &tv_diff);
+ bps = (double)((double)length / timeval2double(&tv_diff));
+ }
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ if (use_clock) {
+ printf(" %14lf Clock/Byte\n",
+ (double)clock_diff / (double)length);
+ } else {
+ if (bps < K)
+ printf(" %14lf B/sec\n", bps);
+ else if (bps < K * K)
+ printf(" %14lfd KB/sec\n", bps / 1024);
+ else if (bps < K * K * K)
+ printf(" %14lf MB/sec\n", bps / 1024 / 1024);
+ else {
+ printf(" %14lf GB/sec\n",
+ bps / 1024 / 1024 / 1024);
+ }
+ }
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ if (use_clock) {
+ printf("%14lf\n",
+ (double)clock_diff / (double)length);
+ } else
+ printf("%lf\n", bps);
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
--
1.6.5.2

2009-11-17 15:15:07

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH v2 2/4] perf bench: Modify bench.h for prototype of bench_mem_memcpy()

This patch modifies bench.h to add the prototype of new function
bench_mem_memcpy(), memcpy() benchmark suite.

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/bench/bench.h | 10 +---------
1 files changed, 1 insertions(+), 9 deletions(-)

diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 3c64205..f7781c6 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -3,6 +3,7 @@

extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);

#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
@@ -13,13 +14,4 @@ extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);

extern int bench_format;

-#define BENCH_FORMAT_DEFAULT_STR "default"
-#define BENCH_FORMAT_DEFAULT 0
-#define BENCH_FORMAT_SIMPLE_STR "simple"
-#define BENCH_FORMAT_SIMPLE 1
-
-#define BENCH_FORMAT_UNKNOWN -1
-
-extern int bench_format;
-
#endif
--
1.6.5.2

2009-11-17 15:15:11

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH v2 3/4] perf bench: Modify builtin-bench.c for new subsystem "mem"

This patch modifies builtin-bench.c to make it be able to
invoke suites of subsystem "mem".

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/builtin-bench.c | 15 ++++++++++++++-
1 files changed, 14 insertions(+), 1 deletions(-)

diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 90c39ba..e043eb8 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -12,6 +12,7 @@
*
* Available subsystem list:
* sched ... scheduler and IPC mechanism
+ * mem ... memory access performance
*
*/

@@ -43,6 +44,15 @@ static struct bench_suite sched_suites[] = {
NULL }
};

+static struct bench_suite mem_suites[] = {
+ { "memcpy",
+ "Simple memory copy in various ways",
+ bench_mem_memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
struct bench_subsys {
const char *name;
const char *summary;
@@ -53,9 +63,12 @@ static struct bench_subsys subsystems[] = {
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
+ { "mem",
+ "memory access performance",
+ mem_suites },
{ NULL,
NULL,
- NULL }
+ NULL }
};

static void dump_suites(int subsys_index)
--
1.6.5.2

2009-11-17 15:15:36

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c

This patch modifies Makefile to build new source file mem-memcpy.c,
benchmark suite for memcpy() performance.

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/Makefile | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 5d1a8b0..b356987 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -430,6 +430,7 @@ BUILTIN_OBJS += builtin-bench.o
# Benchmark modules
BUILTIN_OBJS += bench/sched-messaging.o
BUILTIN_OBJS += bench/sched-pipe.o
+BUILTIN_OBJS += bench/mem-memcpy.o

BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o
--
1.6.5.2

2009-11-17 15:18:04

by Hitoshi Mitake

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c

From: Hitoshi Mitake <[email protected]>
Subject: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c
Date: Wed, 18 Nov 2009 00:15:03 +0900

VEERRRYYY SORRY...

I sent old files...
I'll send new ones later. It's time to sleep...

Hitoshi

2009-11-17 15:20:25

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH v3 1/4] perf bench: Add mem-memcpy.c:memcpy() benchmark

This patch adds new file mem-memcpy.c.
mem-memcpy.c is benchmark suite for measuring memcpy() performance.

This patch is version 3.
Compile error reported by Ingo:
bench/mem-memcpy.c: In function 'bench_mem_memcpy':
bench/mem-memcpy.c:118: error: format '%lu' expects type 'long unsigned int', but argument 4 has type 'size_t'
on x86_32 environment was removed.

Example of use:

| % perf bench mem memcpy -l 1GB
| # Running mem/memcpy benchmark...
| # Copying 1GB Bytes from 0x7f0bf417d010 to 0x7f0c3417e010 ...
|
| 830.937491 MB/Sec
| % perf bench mem memcpy -l 1GB -c
| # Running mem/memcpy benchmark...
| # Copying 1GB Bytes from 0x7f113fbf6010 to 0x7f117fbf7010 ...
|
| 3.259315 Clock/Byte

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/bench/mem-memcpy.c | 196 +++++++++++++++++++++++++++++++++++++++++
1 files changed, 196 insertions(+), 0 deletions(-)
create mode 100644 tools/perf/bench/mem-memcpy.c

diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644
index 0000000..27822ec
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy.c
@@ -0,0 +1,196 @@
+/*
+ *
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Based on memcpy.c by Ma Ling <[email protected]>
+ * http://marc.info/?l=linux-kernel&m=125792321123782&w=2
+ * This memcpy.c is posted to LKML by Ma Ling for comparing
+ * two ways of memory copying.
+ * The thread is started from
+ * http://marc.info/?l=linux-kernel&m=125750023424093&w=2
+ *
+ * Ported to perf by Hitoshi Mitake <[email protected]>
+ *
+ */
+
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/string.h"
+#include "../util/header.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char *length_str = "1MB";
+static const char *routine = "default";
+static int use_clock = 0;
+
+static const struct option options[] = {
+ OPT_STRING('l', "length", &length_str, "1MB",
+ "Specify length of memory to copy. "
+ "available unit: B, MB, GB (upper and lower)"),
+ OPT_STRING('r', "routine", &routine, "default",
+ "Specify routine to copy"),
+ OPT_BOOLEAN('c', "clock", &use_clock,
+ "Use CPU clock for measuring"),
+ OPT_END()
+};
+
+struct routine {
+ const char *name;
+ const char *desc;
+ void * (*fn)(void *dst, const void *src, size_t len);
+};
+
+struct routine routines[] = {
+ { "default",
+ "Default memcpy() provided by glibc",
+ memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+ "perf bench mem memcpy <options>",
+ NULL
+};
+
+static int clock_fd;
+
+static struct perf_event_attr clock_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+ clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+ BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+ int ret;
+ u64 clk;
+
+ ret = read(clock_fd, &clk, sizeof(u64));
+ BUG_ON(ret != sizeof(u64));
+
+ return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+ return (double)ts->tv_sec +
+ (double)ts->tv_usec / (double)1000000;
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+ const char *prefix __used)
+{
+ int i;
+ void *dst, *src;
+ size_t length;
+ double bps = 0.0;
+ struct timeval tv_start, tv_end, tv_diff;
+ u64 clock_start, clock_end, clock_diff;
+
+ clock_start = clock_end = clock_diff = 0ULL;
+ argc = parse_options(argc, argv, options,
+ bench_mem_memcpy_usage, 0);
+
+ tv_diff.tv_sec = 0;
+ tv_diff.tv_usec = 0;
+ length = (size_t)perf_atoll((char *)length_str);
+ if ((long long int)length <= 0) {
+ fprintf(stderr, "Invalid length:%s\n", length_str);
+ return 1;
+ }
+
+ for (i = 0; routines[i].name; i++) {
+ if (!strcmp(routines[i].name, routine))
+ break;
+ }
+ if (!routines[i].name) {
+ printf("Unknown routine:%s\n", routine);
+ printf("Available routines...\n");
+ for (i = 0; routines[i].name; i++) {
+ printf("\t%s ... %s\n",
+ routines[i].name, routines[i].desc);
+ }
+ return 1;
+ }
+
+ dst = calloc(length, sizeof(char));
+ assert(dst);
+ src = calloc(length, sizeof(char));
+ assert(src);
+
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
+ printf("# Copying %s Bytes from %p to %p ...\n\n",
+ length_str, src, dst);
+ }
+
+ if (use_clock) {
+ init_clock();
+ clock_start = get_clock();
+ } else
+ BUG_ON(gettimeofday(&tv_start, NULL));
+
+ routines[i].fn(dst, src, length);
+
+ if (use_clock) {
+ clock_end = get_clock();
+ clock_diff = clock_end - clock_start;
+ } else {
+ BUG_ON(gettimeofday(&tv_end, NULL));
+ timersub(&tv_end, &tv_start, &tv_diff);
+ bps = (double)((double)length / timeval2double(&tv_diff));
+ }
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ if (use_clock) {
+ printf(" %14lf Clock/Byte\n",
+ (double)clock_diff / (double)length);
+ } else {
+ if (bps < K)
+ printf(" %14lf B/Sec\n", bps);
+ else if (bps < K * K)
+ printf(" %14lfd KB/Sec\n", bps / 1024);
+ else if (bps < K * K * K)
+ printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
+ else {
+ printf(" %14lf GB/Sec\n",
+ bps / 1024 / 1024 / 1024);
+ }
+ }
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ if (use_clock) {
+ printf("%14lf\n",
+ (double)clock_diff / (double)length);
+ } else
+ printf("%lf\n", bps);
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
--
1.6.5.2

2009-11-17 15:20:33

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH v3 2/4] perf bench: Modify bench.h for prototype of bench_mem_memcpy()

This patch modifies bench.h to add the prototype of new function
bench_mem_memcpy(), memcpy() benchmark suite.

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/bench/bench.h | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 9fbd8d7..f7781c6 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -3,6 +3,7 @@

extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);

#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
--
1.6.5.2

2009-11-17 15:20:34

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH v3 3/4] perf bench: Modify builtin-bench.c for new subsystem "mem"

This patch modifies builtin-bench.c to make it be able to
invoke suites of subsystem "mem".

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/builtin-bench.c | 15 ++++++++++++++-
1 files changed, 14 insertions(+), 1 deletions(-)

diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 90c39ba..e043eb8 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -12,6 +12,7 @@
*
* Available subsystem list:
* sched ... scheduler and IPC mechanism
+ * mem ... memory access performance
*
*/

@@ -43,6 +44,15 @@ static struct bench_suite sched_suites[] = {
NULL }
};

+static struct bench_suite mem_suites[] = {
+ { "memcpy",
+ "Simple memory copy in various ways",
+ bench_mem_memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
struct bench_subsys {
const char *name;
const char *summary;
@@ -53,9 +63,12 @@ static struct bench_subsys subsystems[] = {
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
+ { "mem",
+ "memory access performance",
+ mem_suites },
{ NULL,
NULL,
- NULL }
+ NULL }
};

static void dump_suites(int subsys_index)
--
1.6.5.2

2009-11-17 15:20:51

by Hitoshi Mitake

[permalink] [raw]
Subject: [PATCH v3 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c

This patch modifies Makefile to build new source file mem-memcpy.c,
benchmark suite for memcpy() performance.

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
---
tools/perf/Makefile | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 5d1a8b0..b356987 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -430,6 +430,7 @@ BUILTIN_OBJS += builtin-bench.o
# Benchmark modules
BUILTIN_OBJS += bench/sched-messaging.o
BUILTIN_OBJS += bench/sched-pipe.o
+BUILTIN_OBJS += bench/mem-memcpy.o

BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o
--
1.6.5.2

2009-11-17 15:37:44

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c


* Hitoshi Mitake <[email protected]> wrote:

> From: Hitoshi Mitake <[email protected]>
> Subject: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c
> Date: Wed, 18 Nov 2009 00:15:03 +0900
>
> VEERRRYYY SORRY...
>
> I sent old files...
> I'll send new ones later. It's time to sleep...

No problem - i'll sort it out.

Ingo

2009-11-17 15:42:49

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c


* Ingo Molnar <[email protected]> wrote:

> * Hitoshi Mitake <[email protected]> wrote:
>
> > From: Hitoshi Mitake <[email protected]>
> > Subject: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c
> > Date: Wed, 18 Nov 2009 00:15:03 +0900
> >
> > VEERRRYYY SORRY...
> >
> > I sent old files...
> > I'll send new ones later. It's time to sleep...
>
> No problem - i'll sort it out.

Looks good here - i've applied it.

Note, i've created a single commit out of the 4 commits - there's no
reason to split up the change into 4 different parts.

Ingo

2009-11-17 15:43:36

by Hitoshi Mitake

[permalink] [raw]
Subject: [tip:perf/core] perf bench: Add memcpy() benchmark

Commit-ID: a123296c34c2b60bced7538ab005e2edf1275aa8
Gitweb: http://git.kernel.org/tip/a123296c34c2b60bced7538ab005e2edf1275aa8
Author: Hitoshi Mitake <[email protected]>
AuthorDate: Wed, 18 Nov 2009 00:20:09 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 17 Nov 2009 16:40:16 +0100

perf bench: Add memcpy() benchmark

'perf bench mem memcpy' is a benchmark suite for measuring memcpy()
performance.

Example:

| % perf bench mem memcpy -l 1GB
| # Running mem/memcpy benchmark...
| # Copying 1GB Bytes from 0x7f0bf417d010 to 0x7f0c3417e010 ...
|
| 830.937491 MB/Sec
|
| % perf bench mem memcpy -l 1GB -c
| # Running mem/memcpy benchmark...
| # Copying 1GB Bytes from 0x7f113fbf6010 to 0x7f117fbf7010 ...
|
| 3.259315 Clock/Byte

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ling Ma <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/Makefile | 1 +
tools/perf/bench/bench.h | 1 +
tools/perf/bench/mem-memcpy.c | 196 +++++++++++++++++++++++++++++++++++++++++
tools/perf/builtin-bench.c | 15 +++-
4 files changed, 212 insertions(+), 1 deletions(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 5d1a8b0..b356987 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -430,6 +430,7 @@ BUILTIN_OBJS += builtin-bench.o
# Benchmark modules
BUILTIN_OBJS += bench/sched-messaging.o
BUILTIN_OBJS += bench/sched-pipe.o
+BUILTIN_OBJS += bench/mem-memcpy.o

BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 9fbd8d7..f7781c6 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -3,6 +3,7 @@

extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);

#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644
index 0000000..27822ec
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy.c
@@ -0,0 +1,196 @@
+/*
+ *
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Based on memcpy.c by Ma Ling <[email protected]>
+ * http://marc.info/?l=linux-kernel&m=125792321123782&w=2
+ * This memcpy.c is posted to LKML by Ma Ling for comparing
+ * two ways of memory copying.
+ * The thread is started from
+ * http://marc.info/?l=linux-kernel&m=125750023424093&w=2
+ *
+ * Ported to perf by Hitoshi Mitake <[email protected]>
+ *
+ */
+
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/string.h"
+#include "../util/header.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char *length_str = "1MB";
+static const char *routine = "default";
+static int use_clock = 0;
+
+static const struct option options[] = {
+ OPT_STRING('l', "length", &length_str, "1MB",
+ "Specify length of memory to copy. "
+ "available unit: B, MB, GB (upper and lower)"),
+ OPT_STRING('r', "routine", &routine, "default",
+ "Specify routine to copy"),
+ OPT_BOOLEAN('c', "clock", &use_clock,
+ "Use CPU clock for measuring"),
+ OPT_END()
+};
+
+struct routine {
+ const char *name;
+ const char *desc;
+ void * (*fn)(void *dst, const void *src, size_t len);
+};
+
+struct routine routines[] = {
+ { "default",
+ "Default memcpy() provided by glibc",
+ memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+ "perf bench mem memcpy <options>",
+ NULL
+};
+
+static int clock_fd;
+
+static struct perf_event_attr clock_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+ clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+ BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+ int ret;
+ u64 clk;
+
+ ret = read(clock_fd, &clk, sizeof(u64));
+ BUG_ON(ret != sizeof(u64));
+
+ return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+ return (double)ts->tv_sec +
+ (double)ts->tv_usec / (double)1000000;
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+ const char *prefix __used)
+{
+ int i;
+ void *dst, *src;
+ size_t length;
+ double bps = 0.0;
+ struct timeval tv_start, tv_end, tv_diff;
+ u64 clock_start, clock_end, clock_diff;
+
+ clock_start = clock_end = clock_diff = 0ULL;
+ argc = parse_options(argc, argv, options,
+ bench_mem_memcpy_usage, 0);
+
+ tv_diff.tv_sec = 0;
+ tv_diff.tv_usec = 0;
+ length = (size_t)perf_atoll((char *)length_str);
+ if ((long long int)length <= 0) {
+ fprintf(stderr, "Invalid length:%s\n", length_str);
+ return 1;
+ }
+
+ for (i = 0; routines[i].name; i++) {
+ if (!strcmp(routines[i].name, routine))
+ break;
+ }
+ if (!routines[i].name) {
+ printf("Unknown routine:%s\n", routine);
+ printf("Available routines...\n");
+ for (i = 0; routines[i].name; i++) {
+ printf("\t%s ... %s\n",
+ routines[i].name, routines[i].desc);
+ }
+ return 1;
+ }
+
+ dst = calloc(length, sizeof(char));
+ assert(dst);
+ src = calloc(length, sizeof(char));
+ assert(src);
+
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
+ printf("# Copying %s Bytes from %p to %p ...\n\n",
+ length_str, src, dst);
+ }
+
+ if (use_clock) {
+ init_clock();
+ clock_start = get_clock();
+ } else
+ BUG_ON(gettimeofday(&tv_start, NULL));
+
+ routines[i].fn(dst, src, length);
+
+ if (use_clock) {
+ clock_end = get_clock();
+ clock_diff = clock_end - clock_start;
+ } else {
+ BUG_ON(gettimeofday(&tv_end, NULL));
+ timersub(&tv_end, &tv_start, &tv_diff);
+ bps = (double)((double)length / timeval2double(&tv_diff));
+ }
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ if (use_clock) {
+ printf(" %14lf Clock/Byte\n",
+ (double)clock_diff / (double)length);
+ } else {
+ if (bps < K)
+ printf(" %14lf B/Sec\n", bps);
+ else if (bps < K * K)
+ printf(" %14lfd KB/Sec\n", bps / 1024);
+ else if (bps < K * K * K)
+ printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
+ else {
+ printf(" %14lf GB/Sec\n",
+ bps / 1024 / 1024 / 1024);
+ }
+ }
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ if (use_clock) {
+ printf("%14lf\n",
+ (double)clock_diff / (double)length);
+ } else
+ printf("%lf\n", bps);
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 90c39ba..e043eb8 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -12,6 +12,7 @@
*
* Available subsystem list:
* sched ... scheduler and IPC mechanism
+ * mem ... memory access performance
*
*/

@@ -43,6 +44,15 @@ static struct bench_suite sched_suites[] = {
NULL }
};

+static struct bench_suite mem_suites[] = {
+ { "memcpy",
+ "Simple memory copy in various ways",
+ bench_mem_memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
struct bench_subsys {
const char *name;
const char *summary;
@@ -53,9 +63,12 @@ static struct bench_subsys subsystems[] = {
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
+ { "mem",
+ "memory access performance",
+ mem_suites },
{ NULL,
NULL,
- NULL }
+ NULL }
};

static void dump_suites(int subsys_index)

2009-11-18 01:41:58

by Hitoshi Mitake

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c

From: Ingo Molnar <[email protected]>
Subject: Re: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c
Date: Tue, 17 Nov 2009 16:42:40 +0100

>
> * Ingo Molnar <[email protected]> wrote:
>
> > * Hitoshi Mitake <[email protected]> wrote:
> >
> > > From: Hitoshi Mitake <[email protected]>
> > > Subject: [PATCH v2 4/4] perf bench: Modify Makefile for new source file mem-memcpy.c
> > > Date: Wed, 18 Nov 2009 00:15:03 +0900
> > >
> > > VEERRRYYY SORRY...
> > >
> > > I sent old files...
> > > I'll send new ones later. It's time to sleep...
> >
> > No problem - i'll sort it out.
>
> Looks good here - i've applied it.
>
> Note, i've created a single commit out of the 4 commits - there's no
> reason to split up the change into 4 different parts.

Thanks.
I'll send one patch when it is little enough from next time.

Hitoshi

2009-11-19 05:37:22

by Hitoshi Mitake

[permalink] [raw]
Subject: [tip:perf/bench] perf bench: Add memcpy() benchmark

Commit-ID: 827f3b4974c5db2968d4979fe6a0ae00ab37bdd8
Gitweb: http://git.kernel.org/tip/827f3b4974c5db2968d4979fe6a0ae00ab37bdd8
Author: Hitoshi Mitake <[email protected]>
AuthorDate: Wed, 18 Nov 2009 00:20:09 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Thu, 19 Nov 2009 06:21:48 +0100

perf bench: Add memcpy() benchmark

'perf bench mem memcpy' is a benchmark suite for measuring memcpy()
performance.

Example on a Intel(R) Core(TM)2 Duo CPU E6850 @ 3.00GHz:

| % perf bench mem memcpy -l 1GB
| # Running mem/memcpy benchmark...
| # Copying 1MB Bytes from 0xb7d98008 to 0xb7e99008 ...
|
| 726.216412 MB/Sec

Signed-off-by: Hitoshi Mitake <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
[ v2: updated changelog, clarified history of builtin-bench.c ]
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/Makefile | 1 +
tools/perf/bench/bench.h | 1 +
tools/perf/bench/mem-memcpy.c | 186 +++++++++++++++++++++++++++++++++++++++++
tools/perf/builtin-bench.c | 15 +++-
4 files changed, 202 insertions(+), 1 deletions(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 3f0666a..53e663a 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -432,6 +432,7 @@ BUILTIN_OBJS += builtin-bench.o
# Benchmark modules
BUILTIN_OBJS += bench/sched-messaging.o
BUILTIN_OBJS += bench/sched-pipe.o
+BUILTIN_OBJS += bench/mem-memcpy.o

BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 9fbd8d7..f7781c6 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -3,6 +3,7 @@

extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);

#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644
index 0000000..d4f4f98
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy.c
@@ -0,0 +1,186 @@
+/*
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Written by Hitoshi Mitake <[email protected]>
+ */
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/string.h"
+#include "../util/header.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char *length_str = "1MB";
+static const char *routine = "default";
+static int use_clock = 0;
+
+static const struct option options[] = {
+ OPT_STRING('l', "length", &length_str, "1MB",
+ "Specify length of memory to copy. "
+ "available unit: B, MB, GB (upper and lower)"),
+ OPT_STRING('r', "routine", &routine, "default",
+ "Specify routine to copy"),
+ OPT_BOOLEAN('c', "clock", &use_clock,
+ "Use CPU clock for measuring"),
+ OPT_END()
+};
+
+struct routine {
+ const char *name;
+ const char *desc;
+ void * (*fn)(void *dst, const void *src, size_t len);
+};
+
+struct routine routines[] = {
+ { "default",
+ "Default memcpy() provided by glibc",
+ memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+ "perf bench mem memcpy <options>",
+ NULL
+};
+
+static int clock_fd;
+
+static struct perf_event_attr clock_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+ clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+ BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+ int ret;
+ u64 clk;
+
+ ret = read(clock_fd, &clk, sizeof(u64));
+ BUG_ON(ret != sizeof(u64));
+
+ return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+ return (double)ts->tv_sec +
+ (double)ts->tv_usec / (double)1000000;
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+ const char *prefix __used)
+{
+ int i;
+ void *dst, *src;
+ size_t length;
+ double bps = 0.0;
+ struct timeval tv_start, tv_end, tv_diff;
+ u64 clock_start, clock_end, clock_diff;
+
+ clock_start = clock_end = clock_diff = 0ULL;
+ argc = parse_options(argc, argv, options,
+ bench_mem_memcpy_usage, 0);
+
+ tv_diff.tv_sec = 0;
+ tv_diff.tv_usec = 0;
+ length = (size_t)perf_atoll((char *)length_str);
+ if ((long long int)length <= 0) {
+ fprintf(stderr, "Invalid length:%s\n", length_str);
+ return 1;
+ }
+
+ for (i = 0; routines[i].name; i++) {
+ if (!strcmp(routines[i].name, routine))
+ break;
+ }
+ if (!routines[i].name) {
+ printf("Unknown routine:%s\n", routine);
+ printf("Available routines...\n");
+ for (i = 0; routines[i].name; i++) {
+ printf("\t%s ... %s\n",
+ routines[i].name, routines[i].desc);
+ }
+ return 1;
+ }
+
+ dst = calloc(length, sizeof(char));
+ assert(dst);
+ src = calloc(length, sizeof(char));
+ assert(src);
+
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
+ printf("# Copying %s Bytes from %p to %p ...\n\n",
+ length_str, src, dst);
+ }
+
+ if (use_clock) {
+ init_clock();
+ clock_start = get_clock();
+ } else
+ BUG_ON(gettimeofday(&tv_start, NULL));
+
+ routines[i].fn(dst, src, length);
+
+ if (use_clock) {
+ clock_end = get_clock();
+ clock_diff = clock_end - clock_start;
+ } else {
+ BUG_ON(gettimeofday(&tv_end, NULL));
+ timersub(&tv_end, &tv_start, &tv_diff);
+ bps = (double)((double)length / timeval2double(&tv_diff));
+ }
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ if (use_clock) {
+ printf(" %14lf Clock/Byte\n",
+ (double)clock_diff / (double)length);
+ } else {
+ if (bps < K)
+ printf(" %14lf B/Sec\n", bps);
+ else if (bps < K * K)
+ printf(" %14lfd KB/Sec\n", bps / 1024);
+ else if (bps < K * K * K)
+ printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
+ else {
+ printf(" %14lf GB/Sec\n",
+ bps / 1024 / 1024 / 1024);
+ }
+ }
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ if (use_clock) {
+ printf("%14lf\n",
+ (double)clock_diff / (double)length);
+ } else
+ printf("%lf\n", bps);
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 90c39ba..e043eb8 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -12,6 +12,7 @@
*
* Available subsystem list:
* sched ... scheduler and IPC mechanism
+ * mem ... memory access performance
*
*/

@@ -43,6 +44,15 @@ static struct bench_suite sched_suites[] = {
NULL }
};

+static struct bench_suite mem_suites[] = {
+ { "memcpy",
+ "Simple memory copy in various ways",
+ bench_mem_memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
struct bench_subsys {
const char *name;
const char *summary;
@@ -53,9 +63,12 @@ static struct bench_subsys subsystems[] = {
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
+ { "mem",
+ "memory access performance",
+ mem_suites },
{ NULL,
NULL,
- NULL }
+ NULL }
};

static void dump_suites(int subsys_index)