First set of various updates for the perf-bench tool. I am
in the process of adding more benchmarks (ipc & futex) but
wanted to get these changes in early in the -rc cycle.
Patches 3 & 5 depend on the first patch, otherwise updates
are pretty much without order.
Please consider applying.
Thanks!
Davidlohr Bueso (9):
perf bench: Add --repeat option
perf bench: sched-messaging: Redo runtime output
perf bench: sched-messaging: Support multiple runs
perf bench: sched-messaging: Plug memleak
perf bench: futex: Use global --repeat option
perf bench: futex: Replace --silent option with global --format
perf bench: mem: -o and -n options are mutually exclusive
perf bench: sched-messaging: Drop barf()
perf bench: futex: Support operations for shared futexes
tools/perf/Documentation/perf-bench.txt | 4 +
tools/perf/bench/bench.h | 1 +
tools/perf/bench/futex-hash.c | 39 ++++++---
tools/perf/bench/futex-requeue.c | 58 ++++++++-----
tools/perf/bench/futex-wake.c | 52 ++++++-----
tools/perf/bench/mem-memcpy.c | 5 ++
tools/perf/bench/mem-memset.c | 5 ++
tools/perf/bench/sched-messaging.c | 149 ++++++++++++++++++--------------
tools/perf/builtin-bench.c | 7 ++
9 files changed, 196 insertions(+), 124 deletions(-)
--
1.8.1.4
Instead of printing sec and usec individually, simplify
into a 'runtime' variable to later use accordingly. This
is particularly helpful when later adding multiple runs
and collecting statistics.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/bench/sched-messaging.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index cc1190a..df0828a 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -269,9 +269,9 @@ static const char * const bench_sched_message_usage[] = {
int bench_sched_messaging(int argc, const char **argv,
const char *prefix __maybe_unused)
{
- unsigned int i, total_children;
+ unsigned int i, total_children, num_fds = 20;
struct timeval start, stop, diff;
- unsigned int num_fds = 20;
+ unsigned long runtime;
int readyfds[2], wakefds[2];
char dummy;
pthread_t *pth_tab;
@@ -307,8 +307,8 @@ int bench_sched_messaging(int argc, const char **argv,
reap_worker(pth_tab[i]);
gettimeofday(&stop, NULL);
-
timersub(&stop, &start, &diff);
+ runtime = (diff.tv_sec * 1e3) + (diff.tv_usec/1e3);
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
@@ -317,13 +317,10 @@ int bench_sched_messaging(int argc, const char **argv,
printf("# %d groups == %d %s run\n\n",
num_groups, num_groups * 2 * num_fds,
thread_mode ? "threads" : "processes");
- printf(" %14s: %lu.%03lu [sec]\n", "Total time",
- diff.tv_sec,
- (unsigned long) (diff.tv_usec/1000));
+ printf(" %14s: %.3f [sec]\n", "Total time", runtime/1e3);
break;
case BENCH_FORMAT_SIMPLE:
- printf("%lu.%03lu\n", diff.tv_sec,
- (unsigned long) (diff.tv_usec/1000));
+ printf("%.3f\n", runtime/1e3);
break;
default:
/* reaching here is something disaster */
--
1.8.1.4
Make use of the new --repeat option in perf-bench to allow
multiple runs. This makes the avg final result much more
useful for users, including displaying statistics.
Also move up the general information output to be showed
before the actual run is done, thus allowing the user to
know what's going on earlier and not getting in the way
of each individual run.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/bench/sched-messaging.c | 103 +++++++++++++++++++++++--------------
1 file changed, 63 insertions(+), 40 deletions(-)
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index df0828a..096ef5a 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -1,16 +1,15 @@
/*
- *
* sched-messaging.c
*
* messaging: Benchmark for scheduler and IPC mechanisms
*
* Based on hackbench by Rusty Russell <[email protected]>
* Ported to perf by Hitoshi Mitake <[email protected]>
- *
*/
#include "../perf.h"
#include "../util/util.h"
+#include "../util/stat.h"
#include "../util/parse-options.h"
#include "../builtin.h"
#include "bench.h"
@@ -35,6 +34,7 @@ static bool use_pipes = false;
static unsigned int loops = 100;
static bool thread_mode = false;
static unsigned int num_groups = 10;
+static struct stats runtime_stats;
struct sender_context {
unsigned int num_fds;
@@ -251,6 +251,28 @@ static unsigned int group(pthread_t *pth,
return num_fds * 2;
}
+static void print_summary(void)
+{
+ double runtime_avg = avg_stats(&runtime_stats);
+ double runtime_stddev = stddev_stats(&runtime_stats);
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("\n%14s: %.3f sec (+-%.2f%%)\n",
+ "Avg Total time",
+ runtime_avg/1e3,
+ rel_stddev_stats(runtime_stddev, runtime_avg));
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ printf("%.3f\n", runtime_avg/1e3);
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(EXIT_FAILURE);
+ }
+}
+
static const struct option options[] = {
OPT_BOOLEAN('p', "pipe", &use_pipes,
"Use pipe() instead of socketpair()"),
@@ -269,7 +291,7 @@ static const char * const bench_sched_message_usage[] = {
int bench_sched_messaging(int argc, const char **argv,
const char *prefix __maybe_unused)
{
- unsigned int i, total_children, num_fds = 20;
+ unsigned int i, j, total_children, num_fds = 20;
struct timeval start, stop, diff;
unsigned long runtime;
int readyfds[2], wakefds[2];
@@ -283,51 +305,52 @@ int bench_sched_messaging(int argc, const char **argv,
if (!pth_tab)
barf("main:malloc()");
- fdpair(readyfds);
- fdpair(wakefds);
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
+ printf("# %d sender and receiver %s per group\n",
+ num_fds, thread_mode ? "threads" : "processes");
+ printf("# %d groups == %d %s run\n\n",
+ num_groups, num_groups * 2 * num_fds,
+ thread_mode ? "threads" : "processes");
+ }
+
+ init_stats(&runtime_stats);
- total_children = 0;
- for (i = 0; i < num_groups; i++)
- total_children += group(pth_tab+total_children, num_fds,
- readyfds[1], wakefds[0]);
+ for (j = 0; j < bench_repeat; j++) {
+ fdpair(readyfds);
+ fdpair(wakefds);
- /* Wait for everyone to be ready */
- for (i = 0; i < total_children; i++)
- if (read(readyfds[0], &dummy, 1) != 1)
- barf("Reading for readyfds");
+ total_children = 0;
+ for (i = 0; i < num_groups; i++)
+ total_children += group(pth_tab+total_children, num_fds,
+ readyfds[1], wakefds[0]);
- gettimeofday(&start, NULL);
+ /* Wait for everyone to be ready */
+ for (i = 0; i < total_children; i++)
+ if (read(readyfds[0], &dummy, 1) != 1)
+ barf("Reading for readyfds");
- /* Kick them off */
- if (write(wakefds[1], &dummy, 1) != 1)
- barf("Writing to start them");
+ gettimeofday(&start, NULL);
- /* Reap them all */
- for (i = 0; i < total_children; i++)
- reap_worker(pth_tab[i]);
+ /* Kick them off */
+ if (write(wakefds[1], &dummy, 1) != 1)
+ barf("Writing to start them");
- gettimeofday(&stop, NULL);
- timersub(&stop, &start, &diff);
- runtime = (diff.tv_sec * 1e3) + (diff.tv_usec/1e3);
+ /* Reap them all */
+ for (i = 0; i < total_children; i++)
+ reap_worker(pth_tab[i]);
- switch (bench_format) {
- case BENCH_FORMAT_DEFAULT:
- printf("# %d sender and receiver %s per group\n",
- num_fds, thread_mode ? "threads" : "processes");
- printf("# %d groups == %d %s run\n\n",
- num_groups, num_groups * 2 * num_fds,
- thread_mode ? "threads" : "processes");
- printf(" %14s: %.3f [sec]\n", "Total time", runtime/1e3);
- break;
- case BENCH_FORMAT_SIMPLE:
- printf("%.3f\n", runtime/1e3);
- break;
- default:
- /* reaching here is something disaster */
- fprintf(stderr, "Unknown format:%d\n", bench_format);
- exit(1);
- break;
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+ runtime = (diff.tv_sec * 1e3) + (diff.tv_usec/1e3);
+ update_stats(&runtime_stats, runtime);
+
+ if (bench_format == BENCH_FORMAT_DEFAULT)
+ printf("[Run %d]: Total Time: %.3f sec\n",
+ j + 1, runtime/1e3);
+
+ usleep(100000);
}
+ print_summary();
return 0;
}
--
1.8.1.4
Explicitly free the thread array ('pth_tab').
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/bench/sched-messaging.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index 096ef5a..b519ba4 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -351,6 +351,8 @@ int bench_sched_messaging(int argc, const char **argv,
usleep(100000);
}
+ free(pth_tab);
print_summary();
+
return 0;
}
--
1.8.1.4
Instead of reinventing the wheel, we can use err(2) when dealing
with fatal errors. Exit code is now always EXIT_FAILURE (1).
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/bench/sched-messaging.c | 45 ++++++++++++++++----------------------
1 file changed, 19 insertions(+), 26 deletions(-)
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index b519ba4..bf5645e 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -27,6 +27,7 @@
#include <sys/time.h>
#include <sys/poll.h>
#include <limits.h>
+#include <err.h>
#define DATASIZE 100
@@ -50,12 +51,6 @@ struct receiver_context {
int wakefd;
};
-static void barf(const char *msg)
-{
- fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
- exit(1);
-}
-
static void fdpair(int fds[2])
{
if (use_pipes) {
@@ -66,7 +61,7 @@ static void fdpair(int fds[2])
return;
}
- barf(use_pipes ? "pipe()" : "socketpair()");
+ err(EXIT_FAILURE, use_pipes ? "pipe()" : "socketpair()");
}
/* Block until we're ready to go */
@@ -77,11 +72,11 @@ static void ready(int ready_out, int wakefd)
/* Tell them we're ready. */
if (write(ready_out, &dummy, 1) != 1)
- barf("CLIENT: ready write");
+ err(EXIT_FAILURE, "CLIENT: ready write");
/* Wait for "GO" signal */
if (poll(&pollfd, 1, -1) != 1)
- barf("poll");
+ err(EXIT_FAILURE, "poll");
}
/* Sender sprays loops messages down each file descriptor */
@@ -101,7 +96,7 @@ again:
ret = write(ctx->out_fds[j], data + done,
sizeof(data)-done);
if (ret < 0)
- barf("SENDER: write");
+ err(EXIT_FAILURE, "SENDER: write");
done += ret;
if (done < DATASIZE)
goto again;
@@ -131,7 +126,7 @@ static void *receiver(struct receiver_context* ctx)
again:
ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
if (ret < 0)
- barf("SERVER: read");
+ err(EXIT_FAILURE, "SERVER: read");
done += ret;
if (done < DATASIZE)
goto again;
@@ -144,14 +139,14 @@ static pthread_t create_worker(void *ctx, void *(*func)(void *))
{
pthread_attr_t attr;
pthread_t childid;
- int err;
+ int ret;
if (!thread_mode) {
/* process mode */
/* Fork the receiver. */
switch (fork()) {
case -1:
- barf("fork()");
+ err(EXIT_FAILURE, "fork()");
break;
case 0:
(*func) (ctx);
@@ -165,19 +160,17 @@ static pthread_t create_worker(void *ctx, void *(*func)(void *))
}
if (pthread_attr_init(&attr) != 0)
- barf("pthread_attr_init:");
+ err(EXIT_FAILURE, "pthread_attr_init:");
#ifndef __ia64__
if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
- barf("pthread_attr_setstacksize");
+ err(EXIT_FAILURE, "pthread_attr_setstacksize");
#endif
- err = pthread_create(&childid, &attr, func, ctx);
- if (err != 0) {
- fprintf(stderr, "pthread_create failed: %s (%d)\n",
- strerror(err), err);
- exit(-1);
- }
+ ret = pthread_create(&childid, &attr, func, ctx);
+ if (ret != 0)
+ err(EXIT_FAILURE, "pthread_create failed");
+
return childid;
}
@@ -207,14 +200,14 @@ static unsigned int group(pthread_t *pth,
+ num_fds * sizeof(int));
if (!snd_ctx)
- barf("malloc()");
+ err(EXIT_FAILURE, "malloc()");
for (i = 0; i < num_fds; i++) {
int fds[2];
struct receiver_context *ctx = malloc(sizeof(*ctx));
if (!ctx)
- barf("malloc()");
+ err(EXIT_FAILURE, "malloc()");
/* Create the pipe between client and server */
@@ -303,7 +296,7 @@ int bench_sched_messaging(int argc, const char **argv,
pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
if (!pth_tab)
- barf("main:malloc()");
+ err(EXIT_FAILURE, "main:malloc()");
if (bench_format == BENCH_FORMAT_DEFAULT) {
printf("# %d sender and receiver %s per group\n",
@@ -327,13 +320,13 @@ int bench_sched_messaging(int argc, const char **argv,
/* Wait for everyone to be ready */
for (i = 0; i < total_children; i++)
if (read(readyfds[0], &dummy, 1) != 1)
- barf("Reading for readyfds");
+ err(EXIT_FAILURE, "Reading for readyfds");
gettimeofday(&start, NULL);
/* Kick them off */
if (write(wakefds[1], &dummy, 1) != 1)
- barf("Writing to start them");
+ err(EXIT_FAILURE, "Writing to start them");
/* Reap them all */
for (i = 0; i < total_children; i++)
--
1.8.1.4
Unlike futex-hash, requeuing and wakeup benchmarks do not support
shared futexes, limiting the usefulness of the programs. Correct
this, and allow using the local -S parameter. The default remains
using private futexes.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/bench/futex-hash.c | 7 +++++--
tools/perf/bench/futex-requeue.c | 22 ++++++++++++++--------
tools/perf/bench/futex-wake.c | 15 ++++++++++-----
3 files changed, 29 insertions(+), 15 deletions(-)
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 14791eb..5175171 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -26,6 +26,7 @@ static unsigned int nsecs = 10;
/* amount of futexes per thread */
static unsigned int nfutexes = 1024;
static bool fshared = false, done = false;
+static int futex_flag = 0;
struct timeval start, end, runtime;
static pthread_mutex_t thread_lock;
@@ -74,8 +75,7 @@ static void *workerfn(void *arg)
* such as internal waitqueue handling, thus enlarging
* the critical region protected by hb->lock.
*/
- ret = futex_wait(&w->futex[i], 1234, NULL,
- fshared ? 0 : FUTEX_PRIVATE_FLAG);
+ ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag);
if (bench_format == BENCH_FORMAT_DEFAULT &&
(!ret || errno != EAGAIN || errno != EWOULDBLOCK))
warn("Non-expected futex return call");
@@ -147,6 +147,9 @@ int bench_futex_hash(int argc, const char **argv,
if (!worker)
goto errmem;
+ if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
if (bench_format == BENCH_FORMAT_DEFAULT) {
printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 7b211c1..197537b 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -30,15 +30,17 @@ static u_int32_t futex1 = 0, futex2 = 0;
static unsigned int nrequeue = 1;
static pthread_t *worker;
-static bool done = 0;
+static bool done = false, fshared = false;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats requeuetime_stats, requeued_stats;
static unsigned int ncpus, threads_starting, nthreads = 0;
+static int futex_flag = 0;
static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
+ OPT_BOOLEAN('S', "shared", &fshared, "Use shared futexes instead of private ones"),
OPT_END()
};
@@ -80,7 +82,7 @@ static void *workerfn(void *arg __maybe_unused)
pthread_cond_wait(&thread_worker, &thread_lock);
pthread_mutex_unlock(&thread_lock);
- futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
+ futex_wait(&futex1, 0, NULL, futex_flag);
return NULL;
}
@@ -137,10 +139,13 @@ int bench_futex_requeue(int argc, const char **argv,
if (!worker)
err(EXIT_FAILURE, "calloc");
+ if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
if (bench_format == BENCH_FORMAT_DEFAULT) {
- printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), "
- "%d at a time.\n\n",
- getpid(), nthreads, &futex1, &futex2, nrequeue);
+ printf("Run summary [PID %d]: Requeuing %d threads (from [%s] futex %p to %p), "
+ "%d at a time.\n\n", getpid(), nthreads,
+ fshared ? "shared":"private", &futex1, &futex2, nrequeue);
}
init_stats(&requeued_stats);
@@ -173,8 +178,9 @@ int bench_futex_requeue(int argc, const char **argv,
* Do not wakeup any tasks blocked on futex1, allowing
* us to really measure futex_wait functionality.
*/
- futex_cmp_requeue(&futex1, 0, &futex2, 0, nrequeue,
- FUTEX_PRIVATE_FLAG);
+ futex_cmp_requeue(&futex1, 0, &futex2, 0,
+ nrequeue, futex_flag);
+
gettimeofday(&end, NULL);
timersub(&end, &start, &runtime);
@@ -187,7 +193,7 @@ int bench_futex_requeue(int argc, const char **argv,
}
/* everybody should be blocked on futex2, wake'em up */
- nrequeued = futex_wake(&futex2, nthreads, FUTEX_PRIVATE_FLAG);
+ nrequeued = futex_wake(&futex2, nthreads, futex_flag);
if (nthreads != nrequeued)
warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index eae6d09..08f62eb 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -31,15 +31,17 @@ static u_int32_t futex1 = 0;
static unsigned int nwakes = 1;
pthread_t *worker;
-static bool done = false;
+static bool done = false, fshared = false;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats waketime_stats, wakeup_stats;
static unsigned int ncpus, threads_starting, nthreads = 0;
+static int futex_flag = 0;
static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
+ OPT_BOOLEAN('S', "shared", &fshared, "Use shared futexes instead of private ones"),
OPT_END()
};
@@ -57,7 +59,7 @@ static void *workerfn(void *arg __maybe_unused)
pthread_cond_wait(&thread_worker, &thread_lock);
pthread_mutex_unlock(&thread_lock);
- futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
+ futex_wait(&futex1, 0, NULL, futex_flag);
return NULL;
}
@@ -140,10 +142,13 @@ int bench_futex_wake(int argc, const char **argv,
if (!worker)
err(EXIT_FAILURE, "calloc");
+ if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
if (bench_format == BENCH_FORMAT_DEFAULT)
- printf("Run summary [PID %d]: blocking on %d threads (at futex %p), "
+ printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), "
"waking up %d at a time.\n\n",
- getpid(), nthreads, &futex1, nwakes);
+ getpid(), nthreads, fshared ? "shared":"private", &futex1, nwakes);
init_stats(&wakeup_stats);
init_stats(&waketime_stats);
@@ -171,7 +176,7 @@ int bench_futex_wake(int argc, const char **argv,
/* Ok, all threads are patiently blocked, start waking folks up */
gettimeofday(&start, NULL);
while (nwoken != nthreads)
- nwoken += futex_wake(&futex1, nwakes, FUTEX_PRIVATE_FLAG);
+ nwoken += futex_wake(&futex1, nwakes, futex_flag);
gettimeofday(&end, NULL);
timersub(&end, &start, &runtime);
--
1.8.1.4
This option is available through perf-bench, use it instead
and free the local option.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/bench/futex-requeue.c | 10 +---------
tools/perf/bench/futex-wake.c | 12 ++----------
2 files changed, 3 insertions(+), 19 deletions(-)
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index a1625587..732403b 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -29,13 +29,6 @@ static u_int32_t futex1 = 0, futex2 = 0;
*/
static unsigned int nrequeue = 1;
-/*
- * There can be significant variance from run to run,
- * the more repeats, the more exact the overall avg and
- * the better idea of the futex latency.
- */
-static unsigned int repeat = 10;
-
static pthread_t *worker;
static bool done = 0, silent = 0;
static pthread_mutex_t thread_lock;
@@ -46,7 +39,6 @@ static unsigned int ncpus, threads_starting, nthreads = 0;
static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
- OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"),
OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
OPT_END()
};
@@ -146,7 +138,7 @@ int bench_futex_requeue(int argc, const char **argv,
pthread_cond_init(&thread_parent, NULL);
pthread_cond_init(&thread_worker, NULL);
- for (j = 0; j < repeat && !done; j++) {
+ for (j = 0; j < bench_repeat && !done; j++) {
unsigned int nrequeued = 0;
struct timeval start, end, runtime;
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index d096169..50022cb 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -30,15 +30,8 @@ static u_int32_t futex1 = 0;
*/
static unsigned int nwakes = 1;
-/*
- * There can be significant variance from run to run,
- * the more repeats, the more exact the overall avg and
- * the better idea of the futex latency.
- */
-static unsigned int repeat = 10;
-
pthread_t *worker;
-static bool done = 0, silent = 0;
+static bool done = false, silent = false;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats waketime_stats, wakeup_stats;
@@ -47,7 +40,6 @@ static unsigned int ncpus, threads_starting, nthreads = 0;
static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
- OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"),
OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
OPT_END()
};
@@ -149,7 +141,7 @@ int bench_futex_wake(int argc, const char **argv,
pthread_cond_init(&thread_parent, NULL);
pthread_cond_init(&thread_worker, NULL);
- for (j = 0; j < repeat && !done; j++) {
+ for (j = 0; j < bench_repeat && !done; j++) {
unsigned int nwoken = 0;
struct timeval start, end, runtime;
--
1.8.1.4
Using the already existing '--format simple' option in perf-bench
is/should be equivalent to disabling any verbose output. Replace
it and free up the -s option specific to the futex benchmark.
Furthermore only show the raw output if used, as it is intended
to make scripting/parsing easier.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/bench/futex-hash.c | 32 +++++++++++++++++++++++---------
tools/perf/bench/futex-requeue.c | 34 +++++++++++++++++++++++-----------
tools/perf/bench/futex-wake.c | 33 ++++++++++++++++++++++-----------
3 files changed, 68 insertions(+), 31 deletions(-)
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index a84206e..14791eb 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -25,7 +25,7 @@ static unsigned int nthreads = 0;
static unsigned int nsecs = 10;
/* amount of futexes per thread */
static unsigned int nfutexes = 1024;
-static bool fshared = false, done = false, silent = false;
+static bool fshared = false, done = false;
struct timeval start, end, runtime;
static pthread_mutex_t thread_lock;
@@ -44,7 +44,6 @@ static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
- OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
OPT_END()
};
@@ -77,7 +76,7 @@ static void *workerfn(void *arg)
*/
ret = futex_wait(&w->futex[i], 1234, NULL,
fshared ? 0 : FUTEX_PRIVATE_FLAG);
- if (!silent &&
+ if (bench_format == BENCH_FORMAT_DEFAULT &&
(!ret || errno != EAGAIN || errno != EWOULDBLOCK))
warn("Non-expected futex return call");
}
@@ -101,9 +100,22 @@ static void print_summary(void)
unsigned long avg = avg_stats(&throughput_stats);
double stddev = stddev_stats(&throughput_stats);
- printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
- !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
- (int) runtime.tv_sec);
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
+ !bench_format == BENCH_FORMAT_DEFAULT ? "\n" : "",
+ avg, rel_stddev_stats(stddev, avg),
+ (int) runtime.tv_sec);
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ printf("%ld\n", avg);
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(EXIT_FAILURE);
+
+ }
}
int bench_futex_hash(int argc, const char **argv,
@@ -135,8 +147,10 @@ int bench_futex_hash(int argc, const char **argv,
if (!worker)
goto errmem;
- printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
- getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
+ printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
+ getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
+ }
init_stats(&throughput_stats);
pthread_mutex_init(&thread_lock, NULL);
@@ -190,7 +204,7 @@ int bench_futex_hash(int argc, const char **argv,
for (i = 0; i < nthreads; i++) {
unsigned long t = worker[i].ops/runtime.tv_sec;
update_stats(&throughput_stats, t);
- if (!silent) {
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
if (nfutexes == 1)
printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
worker[i].tid, &worker[i].futex[0], t);
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 732403b..7b211c1 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -30,7 +30,7 @@ static u_int32_t futex1 = 0, futex2 = 0;
static unsigned int nrequeue = 1;
static pthread_t *worker;
-static bool done = 0, silent = 0;
+static bool done = 0;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats requeuetime_stats, requeued_stats;
@@ -39,7 +39,6 @@ static unsigned int ncpus, threads_starting, nthreads = 0;
static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
- OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
OPT_END()
};
@@ -54,11 +53,22 @@ static void print_summary(void)
double requeuetime_stddev = stddev_stats(&requeuetime_stats);
unsigned int requeued_avg = avg_stats(&requeued_stats);
- printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
- requeued_avg,
- nthreads,
- requeuetime_avg/1e3,
- rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
+ requeued_avg,
+ nthreads,
+ requeuetime_avg/1e3,
+ rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ printf("%.3f\n", requeuetime_avg/1e3);
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(EXIT_FAILURE);
+ }
}
static void *workerfn(void *arg __maybe_unused)
@@ -127,9 +137,11 @@ int bench_futex_requeue(int argc, const char **argv,
if (!worker)
err(EXIT_FAILURE, "calloc");
- printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), "
- "%d at a time.\n\n",
- getpid(), nthreads, &futex1, &futex2, nrequeue);
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
+ printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), "
+ "%d at a time.\n\n",
+ getpid(), nthreads, &futex1, &futex2, nrequeue);
+ }
init_stats(&requeued_stats);
init_stats(&requeuetime_stats);
@@ -169,7 +181,7 @@ int bench_futex_requeue(int argc, const char **argv,
update_stats(&requeued_stats, nrequeued);
update_stats(&requeuetime_stats, runtime.tv_usec);
- if (!silent) {
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
j + 1, nrequeued, nthreads, runtime.tv_usec/1e3);
}
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 50022cb..eae6d09 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -31,7 +31,7 @@ static u_int32_t futex1 = 0;
static unsigned int nwakes = 1;
pthread_t *worker;
-static bool done = false, silent = false;
+static bool done = false;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats waketime_stats, wakeup_stats;
@@ -40,7 +40,6 @@ static unsigned int ncpus, threads_starting, nthreads = 0;
static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
- OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
OPT_END()
};
@@ -68,11 +67,22 @@ static void print_summary(void)
double waketime_stddev = stddev_stats(&waketime_stats);
unsigned int wakeup_avg = avg_stats(&wakeup_stats);
- printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
- wakeup_avg,
- nthreads,
- waketime_avg/1e3,
- rel_stddev_stats(waketime_stddev, waketime_avg));
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
+ wakeup_avg,
+ nthreads,
+ waketime_avg/1e3,
+ rel_stddev_stats(waketime_stddev, waketime_avg));
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ printf("%.4f\n", waketime_avg/1e3);
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(EXIT_FAILURE);
+ }
}
static void block_threads(pthread_t *w,
@@ -130,9 +140,10 @@ int bench_futex_wake(int argc, const char **argv,
if (!worker)
err(EXIT_FAILURE, "calloc");
- printf("Run summary [PID %d]: blocking on %d threads (at futex %p), "
- "waking up %d at a time.\n\n",
- getpid(), nthreads, &futex1, nwakes);
+ if (bench_format == BENCH_FORMAT_DEFAULT)
+ printf("Run summary [PID %d]: blocking on %d threads (at futex %p), "
+ "waking up %d at a time.\n\n",
+ getpid(), nthreads, &futex1, nwakes);
init_stats(&wakeup_stats);
init_stats(&waketime_stats);
@@ -167,7 +178,7 @@ int bench_futex_wake(int argc, const char **argv,
update_stats(&wakeup_stats, nwoken);
update_stats(&waketime_stats, runtime.tv_usec);
- if (!silent) {
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
j + 1, nwoken, nthreads, runtime.tv_usec/1e3);
}
--
1.8.1.4
-o, --only-prefault Show only the result with page faults before mem*
-n, --no-prefault Show only the result without page faults before mem*
Makes no sense to call together. Applies to both memset and memcpy.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/bench/mem-memcpy.c | 5 +++++
tools/perf/bench/mem-memset.c | 5 +++++
2 files changed, 10 insertions(+)
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 5ce71d3..e622c3e 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -189,6 +189,11 @@ int bench_mem_memcpy(int argc, const char **argv,
argc = parse_options(argc, argv, options,
bench_mem_memcpy_usage, 0);
+ if (no_prefault && only_prefault) {
+ fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
+ return 1;
+ }
+
if (use_cycle)
init_cycle();
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
index 9af79d2..2a65468 100644
--- a/tools/perf/bench/mem-memset.c
+++ b/tools/perf/bench/mem-memset.c
@@ -181,6 +181,11 @@ int bench_mem_memset(int argc, const char **argv,
argc = parse_options(argc, argv, options,
bench_mem_memset_usage, 0);
+ if (no_prefault && only_prefault) {
+ fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
+ return 1;
+ }
+
if (use_cycle)
init_cycle();
--
1.8.1.4
There are a number of benchmarks that do single runs and
as a result does not really help users gain a general idea
of how the workload performs. So the user must either manually
do multiple runs or just use single bogus results.
This option will enable users to specify the amount of runs
(arbitrarily defaulted to 5) through the '--repeat' option.
Add it to perf-bench instead of implementing it always in
each specific benchmark.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/Documentation/perf-bench.txt | 4 ++++
tools/perf/bench/bench.h | 1 +
tools/perf/builtin-bench.c | 7 +++++++
3 files changed, 12 insertions(+)
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index 4464ad7..fd70928 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -16,6 +16,10 @@ This 'perf bench' command is a general framework for benchmark suites.
COMMON OPTIONS
--------------
+-r::
+--repeat=::
+Specify amount of times to repeat the run (default 5).
+
-f::
--format=::
Specify format style.
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index eba4670..3c4dd44 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -43,5 +43,6 @@ extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
#define BENCH_FORMAT_UNKNOWN -1
extern int bench_format;
+extern unsigned int bench_repeat;
#endif
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 1e6e777..820da6e 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -104,9 +104,11 @@ static const char *bench_format_str;
/* Output/formatting style, exported to benchmark modules: */
int bench_format = BENCH_FORMAT_DEFAULT;
+unsigned int bench_repeat = 5; /* default number of times to repeat the run */
static const struct option bench_options[] = {
OPT_STRING('f', "format", &bench_format_str, "default", "Specify format style"),
+ OPT_UINTEGER('r', "repeat", &bench_repeat, "Specify amount of times to repeat the run"),
OPT_END()
};
@@ -226,6 +228,11 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
goto end;
}
+ if (bench_repeat == 0) {
+ printf("Invalid repeat option: Must specify a positive value\n");
+ goto end;
+ }
+
if (argc < 1) {
print_usage();
goto end;
--
1.8.1.4
Hi Davidlohr,
On Mon, 16 Jun 2014 11:14:19 -0700, Davidlohr Bueso wrote:
> There are a number of benchmarks that do single runs and
> as a result does not really help users gain a general idea
> of how the workload performs. So the user must either manually
> do multiple runs or just use single bogus results.
>
> This option will enable users to specify the amount of runs
> (arbitrarily defaulted to 5) through the '--repeat' option.
> Add it to perf-bench instead of implementing it always in
> each specific benchmark.
By adding a top-level option, I think it should be applied to all
benchmaks - but I guess it only supports sched messaging and futex,
right?
Also it makes benchmarks to run 5 times (by default) even if the option
is not given at all, is that your intention?
Thanks,
Namhyung
>
> Signed-off-by: Davidlohr Bueso <[email protected]>
> ---
> tools/perf/Documentation/perf-bench.txt | 4 ++++
> tools/perf/bench/bench.h | 1 +
> tools/perf/builtin-bench.c | 7 +++++++
> 3 files changed, 12 insertions(+)
>
> diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
> index 4464ad7..fd70928 100644
> --- a/tools/perf/Documentation/perf-bench.txt
> +++ b/tools/perf/Documentation/perf-bench.txt
> @@ -16,6 +16,10 @@ This 'perf bench' command is a general framework for benchmark suites.
>
> COMMON OPTIONS
> --------------
> +-r::
> +--repeat=::
> +Specify amount of times to repeat the run (default 5).
> +
> -f::
> --format=::
> Specify format style.
> diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
> index eba4670..3c4dd44 100644
> --- a/tools/perf/bench/bench.h
> +++ b/tools/perf/bench/bench.h
> @@ -43,5 +43,6 @@ extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
> #define BENCH_FORMAT_UNKNOWN -1
>
> extern int bench_format;
> +extern unsigned int bench_repeat;
>
> #endif
> diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
> index 1e6e777..820da6e 100644
> --- a/tools/perf/builtin-bench.c
> +++ b/tools/perf/builtin-bench.c
> @@ -104,9 +104,11 @@ static const char *bench_format_str;
>
> /* Output/formatting style, exported to benchmark modules: */
> int bench_format = BENCH_FORMAT_DEFAULT;
> +unsigned int bench_repeat = 5; /* default number of times to repeat the run */
>
> static const struct option bench_options[] = {
> OPT_STRING('f', "format", &bench_format_str, "default", "Specify format style"),
> + OPT_UINTEGER('r', "repeat", &bench_repeat, "Specify amount of times to repeat the run"),
> OPT_END()
> };
>
> @@ -226,6 +228,11 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
> goto end;
> }
>
> + if (bench_repeat == 0) {
> + printf("Invalid repeat option: Must specify a positive value\n");
> + goto end;
> + }
> +
> if (argc < 1) {
> print_usage();
> goto end;
On Mon, 16 Jun 2014 11:14:22 -0700, Davidlohr Bueso wrote:
> Explicitly free the thread array ('pth_tab').
It seems like sender/receiver contexts were also leaked.
Thanks,
Namhyung
Hi Namhyung,
On Thu, 2014-06-19 at 15:14 +0900, Namhyung Kim wrote:
> Hi Davidlohr,
>
> On Mon, 16 Jun 2014 11:14:19 -0700, Davidlohr Bueso wrote:
> > There are a number of benchmarks that do single runs and
> > as a result does not really help users gain a general idea
> > of how the workload performs. So the user must either manually
> > do multiple runs or just use single bogus results.
> >
> > This option will enable users to specify the amount of runs
> > (arbitrarily defaulted to 5) through the '--repeat' option.
> > Add it to perf-bench instead of implementing it always in
> > each specific benchmark.
>
> By adding a top-level option, I think it should be applied to all
> benchmaks - but I guess it only supports sched messaging and futex,
> right?
Yes, for now only those. While there is opportunity for others to use it
as well (perhaps shed-pipe & memcpy/memset), I don't think *all*
benchmarks need multiple runs, ie: numa.
> Also it makes benchmarks to run 5 times (by default) even if the option
> is not given at all, is that your intention?
Yes. 5x seemed prudent for the time being. If the option isn't supported
by some benchmark, then it is safely ignored.
Thanks,
Davidlohr
Em Mon, Jun 16, 2014 at 11:14:24AM -0700, Davidlohr Bueso escreveu:
> Using the already existing '--format simple' option in perf-bench
> is/should be equivalent to disabling any verbose output. Replace
> it and free up the -s option specific to the futex benchmark.
Isn't this much longer?
I haven't seen any patch in this series wanting to use -s.
Ingo, are you Ok with this?
I.e. I'm just trying to be careful when changing existing cmd line args,
perhaps someone is used to this, who knows, and at least for me,
--silent is way, way more clear than '--format simple', that says
nothing to me.
- Arnaldo
> Furthermore only show the raw output if used, as it is intended
> to make scripting/parsing easier.
> Signed-off-by: Davidlohr Bueso <[email protected]>
> ---
> tools/perf/bench/futex-hash.c | 32 +++++++++++++++++++++++---------
> tools/perf/bench/futex-requeue.c | 34 +++++++++++++++++++++++-----------
> tools/perf/bench/futex-wake.c | 33 ++++++++++++++++++++++-----------
> 3 files changed, 68 insertions(+), 31 deletions(-)
>
> diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
> index a84206e..14791eb 100644
> --- a/tools/perf/bench/futex-hash.c
> +++ b/tools/perf/bench/futex-hash.c
> @@ -25,7 +25,7 @@ static unsigned int nthreads = 0;
> static unsigned int nsecs = 10;
> /* amount of futexes per thread */
> static unsigned int nfutexes = 1024;
> -static bool fshared = false, done = false, silent = false;
> +static bool fshared = false, done = false;
>
> struct timeval start, end, runtime;
> static pthread_mutex_t thread_lock;
> @@ -44,7 +44,6 @@ static const struct option options[] = {
> OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
> OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
> OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
> - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
> OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
> OPT_END()
> };
> @@ -77,7 +76,7 @@ static void *workerfn(void *arg)
> */
> ret = futex_wait(&w->futex[i], 1234, NULL,
> fshared ? 0 : FUTEX_PRIVATE_FLAG);
> - if (!silent &&
> + if (bench_format == BENCH_FORMAT_DEFAULT &&
> (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
> warn("Non-expected futex return call");
> }
> @@ -101,9 +100,22 @@ static void print_summary(void)
> unsigned long avg = avg_stats(&throughput_stats);
> double stddev = stddev_stats(&throughput_stats);
>
> - printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
> - !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
> - (int) runtime.tv_sec);
> + switch (bench_format) {
> + case BENCH_FORMAT_DEFAULT:
> + printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
> + !bench_format == BENCH_FORMAT_DEFAULT ? "\n" : "",
> + avg, rel_stddev_stats(stddev, avg),
> + (int) runtime.tv_sec);
> + break;
> + case BENCH_FORMAT_SIMPLE:
> + printf("%ld\n", avg);
> + break;
> + default:
> + /* reaching here is something disaster */
> + fprintf(stderr, "Unknown format:%d\n", bench_format);
> + exit(EXIT_FAILURE);
> +
> + }
> }
>
> int bench_futex_hash(int argc, const char **argv,
> @@ -135,8 +147,10 @@ int bench_futex_hash(int argc, const char **argv,
> if (!worker)
> goto errmem;
>
> - printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
> - getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
> + if (bench_format == BENCH_FORMAT_DEFAULT) {
> + printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
> + getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
> + }
>
> init_stats(&throughput_stats);
> pthread_mutex_init(&thread_lock, NULL);
> @@ -190,7 +204,7 @@ int bench_futex_hash(int argc, const char **argv,
> for (i = 0; i < nthreads; i++) {
> unsigned long t = worker[i].ops/runtime.tv_sec;
> update_stats(&throughput_stats, t);
> - if (!silent) {
> + if (bench_format == BENCH_FORMAT_DEFAULT) {
> if (nfutexes == 1)
> printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
> worker[i].tid, &worker[i].futex[0], t);
> diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
> index 732403b..7b211c1 100644
> --- a/tools/perf/bench/futex-requeue.c
> +++ b/tools/perf/bench/futex-requeue.c
> @@ -30,7 +30,7 @@ static u_int32_t futex1 = 0, futex2 = 0;
> static unsigned int nrequeue = 1;
>
> static pthread_t *worker;
> -static bool done = 0, silent = 0;
> +static bool done = 0;
> static pthread_mutex_t thread_lock;
> static pthread_cond_t thread_parent, thread_worker;
> static struct stats requeuetime_stats, requeued_stats;
> @@ -39,7 +39,6 @@ static unsigned int ncpus, threads_starting, nthreads = 0;
> static const struct option options[] = {
> OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
> OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
> - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
> OPT_END()
> };
>
> @@ -54,11 +53,22 @@ static void print_summary(void)
> double requeuetime_stddev = stddev_stats(&requeuetime_stats);
> unsigned int requeued_avg = avg_stats(&requeued_stats);
>
> - printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
> - requeued_avg,
> - nthreads,
> - requeuetime_avg/1e3,
> - rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
> + switch (bench_format) {
> + case BENCH_FORMAT_DEFAULT:
> + printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
> + requeued_avg,
> + nthreads,
> + requeuetime_avg/1e3,
> + rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
> + break;
> + case BENCH_FORMAT_SIMPLE:
> + printf("%.3f\n", requeuetime_avg/1e3);
> + break;
> + default:
> + /* reaching here is something disaster */
> + fprintf(stderr, "Unknown format:%d\n", bench_format);
> + exit(EXIT_FAILURE);
> + }
> }
>
> static void *workerfn(void *arg __maybe_unused)
> @@ -127,9 +137,11 @@ int bench_futex_requeue(int argc, const char **argv,
> if (!worker)
> err(EXIT_FAILURE, "calloc");
>
> - printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), "
> - "%d at a time.\n\n",
> - getpid(), nthreads, &futex1, &futex2, nrequeue);
> + if (bench_format == BENCH_FORMAT_DEFAULT) {
> + printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), "
> + "%d at a time.\n\n",
> + getpid(), nthreads, &futex1, &futex2, nrequeue);
> + }
>
> init_stats(&requeued_stats);
> init_stats(&requeuetime_stats);
> @@ -169,7 +181,7 @@ int bench_futex_requeue(int argc, const char **argv,
> update_stats(&requeued_stats, nrequeued);
> update_stats(&requeuetime_stats, runtime.tv_usec);
>
> - if (!silent) {
> + if (bench_format == BENCH_FORMAT_DEFAULT) {
> printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
> j + 1, nrequeued, nthreads, runtime.tv_usec/1e3);
> }
> diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
> index 50022cb..eae6d09 100644
> --- a/tools/perf/bench/futex-wake.c
> +++ b/tools/perf/bench/futex-wake.c
> @@ -31,7 +31,7 @@ static u_int32_t futex1 = 0;
> static unsigned int nwakes = 1;
>
> pthread_t *worker;
> -static bool done = false, silent = false;
> +static bool done = false;
> static pthread_mutex_t thread_lock;
> static pthread_cond_t thread_parent, thread_worker;
> static struct stats waketime_stats, wakeup_stats;
> @@ -40,7 +40,6 @@ static unsigned int ncpus, threads_starting, nthreads = 0;
> static const struct option options[] = {
> OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
> OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
> - OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
> OPT_END()
> };
>
> @@ -68,11 +67,22 @@ static void print_summary(void)
> double waketime_stddev = stddev_stats(&waketime_stats);
> unsigned int wakeup_avg = avg_stats(&wakeup_stats);
>
> - printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
> - wakeup_avg,
> - nthreads,
> - waketime_avg/1e3,
> - rel_stddev_stats(waketime_stddev, waketime_avg));
> + switch (bench_format) {
> + case BENCH_FORMAT_DEFAULT:
> + printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
> + wakeup_avg,
> + nthreads,
> + waketime_avg/1e3,
> + rel_stddev_stats(waketime_stddev, waketime_avg));
> + break;
> + case BENCH_FORMAT_SIMPLE:
> + printf("%.4f\n", waketime_avg/1e3);
> + break;
> + default:
> + /* reaching here is something disaster */
> + fprintf(stderr, "Unknown format:%d\n", bench_format);
> + exit(EXIT_FAILURE);
> + }
> }
>
> static void block_threads(pthread_t *w,
> @@ -130,9 +140,10 @@ int bench_futex_wake(int argc, const char **argv,
> if (!worker)
> err(EXIT_FAILURE, "calloc");
>
> - printf("Run summary [PID %d]: blocking on %d threads (at futex %p), "
> - "waking up %d at a time.\n\n",
> - getpid(), nthreads, &futex1, nwakes);
> + if (bench_format == BENCH_FORMAT_DEFAULT)
> + printf("Run summary [PID %d]: blocking on %d threads (at futex %p), "
> + "waking up %d at a time.\n\n",
> + getpid(), nthreads, &futex1, nwakes);
>
> init_stats(&wakeup_stats);
> init_stats(&waketime_stats);
> @@ -167,7 +178,7 @@ int bench_futex_wake(int argc, const char **argv,
> update_stats(&wakeup_stats, nwoken);
> update_stats(&waketime_stats, runtime.tv_usec);
>
> - if (!silent) {
> + if (bench_format == BENCH_FORMAT_DEFAULT) {
> printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
> j + 1, nwoken, nthreads, runtime.tv_usec/1e3);
> }
> --
> 1.8.1.4
Em Mon, Jun 16, 2014 at 11:14:27AM -0700, Davidlohr Bueso escreveu:
> Unlike futex-hash, requeuing and wakeup benchmarks do not support
> shared futexes, limiting the usefulness of the programs. Correct
> this, and allow using the local -S parameter. The default remains
> using private futexes.
Also isn't this benchmark related to something Darren did? Just checking
why he isn't in the CC list, Darren, does this all sound fine to you?
- Arnaldo
> Signed-off-by: Davidlohr Bueso <[email protected]>
> ---
> tools/perf/bench/futex-hash.c | 7 +++++--
> tools/perf/bench/futex-requeue.c | 22 ++++++++++++++--------
> tools/perf/bench/futex-wake.c | 15 ++++++++++-----
> 3 files changed, 29 insertions(+), 15 deletions(-)
>
> diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
> index 14791eb..5175171 100644
> --- a/tools/perf/bench/futex-hash.c
> +++ b/tools/perf/bench/futex-hash.c
> @@ -26,6 +26,7 @@ static unsigned int nsecs = 10;
> /* amount of futexes per thread */
> static unsigned int nfutexes = 1024;
> static bool fshared = false, done = false;
> +static int futex_flag = 0;
>
> struct timeval start, end, runtime;
> static pthread_mutex_t thread_lock;
> @@ -74,8 +75,7 @@ static void *workerfn(void *arg)
> * such as internal waitqueue handling, thus enlarging
> * the critical region protected by hb->lock.
> */
> - ret = futex_wait(&w->futex[i], 1234, NULL,
> - fshared ? 0 : FUTEX_PRIVATE_FLAG);
> + ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag);
> if (bench_format == BENCH_FORMAT_DEFAULT &&
> (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
> warn("Non-expected futex return call");
> @@ -147,6 +147,9 @@ int bench_futex_hash(int argc, const char **argv,
> if (!worker)
> goto errmem;
>
> + if (!fshared)
> + futex_flag = FUTEX_PRIVATE_FLAG;
> +
> if (bench_format == BENCH_FORMAT_DEFAULT) {
> printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
> getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
> diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
> index 7b211c1..197537b 100644
> --- a/tools/perf/bench/futex-requeue.c
> +++ b/tools/perf/bench/futex-requeue.c
> @@ -30,15 +30,17 @@ static u_int32_t futex1 = 0, futex2 = 0;
> static unsigned int nrequeue = 1;
>
> static pthread_t *worker;
> -static bool done = 0;
> +static bool done = false, fshared = false;
> static pthread_mutex_t thread_lock;
> static pthread_cond_t thread_parent, thread_worker;
> static struct stats requeuetime_stats, requeued_stats;
> static unsigned int ncpus, threads_starting, nthreads = 0;
> +static int futex_flag = 0;
>
> static const struct option options[] = {
> OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
> OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
> + OPT_BOOLEAN('S', "shared", &fshared, "Use shared futexes instead of private ones"),
> OPT_END()
> };
>
> @@ -80,7 +82,7 @@ static void *workerfn(void *arg __maybe_unused)
> pthread_cond_wait(&thread_worker, &thread_lock);
> pthread_mutex_unlock(&thread_lock);
>
> - futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
> + futex_wait(&futex1, 0, NULL, futex_flag);
> return NULL;
> }
>
> @@ -137,10 +139,13 @@ int bench_futex_requeue(int argc, const char **argv,
> if (!worker)
> err(EXIT_FAILURE, "calloc");
>
> + if (!fshared)
> + futex_flag = FUTEX_PRIVATE_FLAG;
> +
> if (bench_format == BENCH_FORMAT_DEFAULT) {
> - printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), "
> - "%d at a time.\n\n",
> - getpid(), nthreads, &futex1, &futex2, nrequeue);
> + printf("Run summary [PID %d]: Requeuing %d threads (from [%s] futex %p to %p), "
> + "%d at a time.\n\n", getpid(), nthreads,
> + fshared ? "shared":"private", &futex1, &futex2, nrequeue);
> }
>
> init_stats(&requeued_stats);
> @@ -173,8 +178,9 @@ int bench_futex_requeue(int argc, const char **argv,
> * Do not wakeup any tasks blocked on futex1, allowing
> * us to really measure futex_wait functionality.
> */
> - futex_cmp_requeue(&futex1, 0, &futex2, 0, nrequeue,
> - FUTEX_PRIVATE_FLAG);
> + futex_cmp_requeue(&futex1, 0, &futex2, 0,
> + nrequeue, futex_flag);
> +
> gettimeofday(&end, NULL);
> timersub(&end, &start, &runtime);
>
> @@ -187,7 +193,7 @@ int bench_futex_requeue(int argc, const char **argv,
> }
>
> /* everybody should be blocked on futex2, wake'em up */
> - nrequeued = futex_wake(&futex2, nthreads, FUTEX_PRIVATE_FLAG);
> + nrequeued = futex_wake(&futex2, nthreads, futex_flag);
> if (nthreads != nrequeued)
> warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
>
> diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
> index eae6d09..08f62eb 100644
> --- a/tools/perf/bench/futex-wake.c
> +++ b/tools/perf/bench/futex-wake.c
> @@ -31,15 +31,17 @@ static u_int32_t futex1 = 0;
> static unsigned int nwakes = 1;
>
> pthread_t *worker;
> -static bool done = false;
> +static bool done = false, fshared = false;
> static pthread_mutex_t thread_lock;
> static pthread_cond_t thread_parent, thread_worker;
> static struct stats waketime_stats, wakeup_stats;
> static unsigned int ncpus, threads_starting, nthreads = 0;
> +static int futex_flag = 0;
>
> static const struct option options[] = {
> OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
> OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
> + OPT_BOOLEAN('S', "shared", &fshared, "Use shared futexes instead of private ones"),
> OPT_END()
> };
>
> @@ -57,7 +59,7 @@ static void *workerfn(void *arg __maybe_unused)
> pthread_cond_wait(&thread_worker, &thread_lock);
> pthread_mutex_unlock(&thread_lock);
>
> - futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
> + futex_wait(&futex1, 0, NULL, futex_flag);
> return NULL;
> }
>
> @@ -140,10 +142,13 @@ int bench_futex_wake(int argc, const char **argv,
> if (!worker)
> err(EXIT_FAILURE, "calloc");
>
> + if (!fshared)
> + futex_flag = FUTEX_PRIVATE_FLAG;
> +
> if (bench_format == BENCH_FORMAT_DEFAULT)
> - printf("Run summary [PID %d]: blocking on %d threads (at futex %p), "
> + printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), "
> "waking up %d at a time.\n\n",
> - getpid(), nthreads, &futex1, nwakes);
> + getpid(), nthreads, fshared ? "shared":"private", &futex1, nwakes);
>
> init_stats(&wakeup_stats);
> init_stats(&waketime_stats);
> @@ -171,7 +176,7 @@ int bench_futex_wake(int argc, const char **argv,
> /* Ok, all threads are patiently blocked, start waking folks up */
> gettimeofday(&start, NULL);
> while (nwoken != nthreads)
> - nwoken += futex_wake(&futex1, nwakes, FUTEX_PRIVATE_FLAG);
> + nwoken += futex_wake(&futex1, nwakes, futex_flag);
> gettimeofday(&end, NULL);
> timersub(&end, &start, &runtime);
>
> --
> 1.8.1.4
On Thu, 2014-06-19 at 13:41 -0300, Arnaldo Carvalho de Melo wrote:
> Em Mon, Jun 16, 2014 at 11:14:27AM -0700, Davidlohr Bueso escreveu:
> > Unlike futex-hash, requeuing and wakeup benchmarks do not support
> > shared futexes, limiting the usefulness of the programs. Correct
> > this, and allow using the local -S parameter. The default remains
> > using private futexes.
>
> Also isn't this benchmark related to something Darren did? Just checking
> why he isn't in the CC list, Darren, does this all sound fine to you?
Darren is well aware of these benchmarks, and we've discussed the topic
in the past. This change doesn't conflict with what he maintains in the
futextest suite.
Thanks,
Davidlohr
On Thu, 2014-06-19 at 13:38 -0300, Arnaldo Carvalho de Melo wrote:
> Em Mon, Jun 16, 2014 at 11:14:24AM -0700, Davidlohr Bueso escreveu:
> > Using the already existing '--format simple' option in perf-bench
> > is/should be equivalent to disabling any verbose output. Replace
> > it and free up the -s option specific to the futex benchmark.
>
> Isn't this much longer?
It is, and to be honest I detest the whole --format option. But its
already there and I really want to make use of global parameters in
order to recycle options that are benchmark-specific. What I'd really
like is to replace it with --silent and just have perf spit out the raw
data (for scripting). But that would be too disruptive to users, imho.
Does it matter? I dunno.
> I haven't seen any patch in this series wanting to use -s.
>
> Ingo, are you Ok with this?
>
> I.e. I'm just trying to be careful when changing existing cmd line args,
> perhaps someone is used to this, who knows, and at least for me,
> --silent is way, way more clear than '--format simple', that says
> nothing to me.
If we can get away with it then lets.
Thanks,
Davidlohr
Em Thu, Jun 19, 2014 at 09:43:49AM -0700, Davidlohr Bueso escreveu:
> On Thu, 2014-06-19 at 13:41 -0300, Arnaldo Carvalho de Melo wrote:
> > Em Mon, Jun 16, 2014 at 11:14:27AM -0700, Davidlohr Bueso escreveu:
> > > Unlike futex-hash, requeuing and wakeup benchmarks do not support
> > > shared futexes, limiting the usefulness of the programs. Correct
> > > this, and allow using the local -S parameter. The default remains
> > > using private futexes.
> >
> > Also isn't this benchmark related to something Darren did? Just checking
> > why he isn't in the CC list, Darren, does this all sound fine to you?
>
> Darren is well aware of these benchmarks, and we've discussed the topic
> in the past. This change doesn't conflict with what he maintains in the
> futextest suite.
Ok, thanks, just checking :-)
- Arnaldo
Em Mon, Jun 16, 2014 at 11:14:20AM -0700, Davidlohr Bueso escreveu:
> Instead of printing sec and usec individually, simplify
> into a 'runtime' variable to later use accordingly. This
> is particularly helpful when later adding multiple runs
> and collecting statistics.
>
> Signed-off-by: Davidlohr Bueso <[email protected]>
> ---
> tools/perf/bench/sched-messaging.c | 13 +++++--------
> 1 file changed, 5 insertions(+), 8 deletions(-)
>
> diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
> index cc1190a..df0828a 100644
> --- a/tools/perf/bench/sched-messaging.c
> +++ b/tools/perf/bench/sched-messaging.c
> @@ -269,9 +269,9 @@ static const char * const bench_sched_message_usage[] = {
> int bench_sched_messaging(int argc, const char **argv,
> const char *prefix __maybe_unused)
> {
> - unsigned int i, total_children;
> + unsigned int i, total_children, num_fds = 20;
> struct timeval start, stop, diff;
> - unsigned int num_fds = 20;
> + unsigned long runtime;
> int readyfds[2], wakefds[2];
> char dummy;
> pthread_t *pth_tab;
> @@ -307,8 +307,8 @@ int bench_sched_messaging(int argc, const char **argv,
> reap_worker(pth_tab[i]);
>
> gettimeofday(&stop, NULL);
> -
> timersub(&stop, &start, &diff);
> + runtime = (diff.tv_sec * 1e3) + (diff.tv_usec/1e3);
So if you want to really change this into something more compact, why
not declare runtime as float and do the div here, so that you don't have
to do it everytime you need to format it?
But perhaps because in the kernel we avoid doing floating point I found
this code unusual, was expecting things like USEC_PER_MSEC, and no
floating pointing operations being used, i.e. as before :-\
Anyway, I took most of the patches in this series, will push what I have
after some more testing.
- Arnaldo
>
> switch (bench_format) {
> case BENCH_FORMAT_DEFAULT:
> @@ -317,13 +317,10 @@ int bench_sched_messaging(int argc, const char **argv,
> printf("# %d groups == %d %s run\n\n",
> num_groups, num_groups * 2 * num_fds,
> thread_mode ? "threads" : "processes");
> - printf(" %14s: %lu.%03lu [sec]\n", "Total time",
> - diff.tv_sec,
> - (unsigned long) (diff.tv_usec/1000));
> + printf(" %14s: %.3f [sec]\n", "Total time", runtime/1e3);
> break;
> case BENCH_FORMAT_SIMPLE:
> - printf("%lu.%03lu\n", diff.tv_sec,
> - (unsigned long) (diff.tv_usec/1000));
> + printf("%.3f\n", runtime/1e3);
> break;
> default:
> /* reaching here is something disaster */
> --
> 1.8.1.4
On Thu, 2014-06-19 at 14:05 -0300, Arnaldo Carvalho de Melo wrote:
> Em Thu, Jun 19, 2014 at 09:43:49AM -0700, Davidlohr Bueso escreveu:
> > On Thu, 2014-06-19 at 13:41 -0300, Arnaldo Carvalho de Melo wrote:
> > > Em Mon, Jun 16, 2014 at 11:14:27AM -0700, Davidlohr Bueso escreveu:
> > > > Unlike futex-hash, requeuing and wakeup benchmarks do not support
> > > > shared futexes, limiting the usefulness of the programs. Correct
> > > > this, and allow using the local -S parameter. The default remains
> > > > using private futexes.
> > >
> > > Also isn't this benchmark related to something Darren did? Just checking
> > > why he isn't in the CC list, Darren, does this all sound fine to you?
> >
> > Darren is well aware of these benchmarks, and we've discussed the topic
> > in the past. This change doesn't conflict with what he maintains in the
> > futextest suite.
>
> Ok, thanks, just checking :-)
Right. The plan, for the record, is to augment functional tests in
futextest and move all performance related tests to perf. Davidlohr's
tests are independent from those in futextest. I haven't done an
exhaustive comparison to see if there is overlap, but since he got to
perf first, we'll determine which futextest/performance tests add value
and add tose, and deep six the rest.
The functional tests may then be moved to linux/tools and futextest will
then be obsolete.
--
Darren Hart
Intel Open Source Technology Center
Hi Davidlohr,
On Thu, Jun 19, 2014 at 11:45 AM, Davidlohr Bueso <[email protected]> wrote:
> Hi Namhyung,
>
> On Thu, 2014-06-19 at 15:14 +0900, Namhyung Kim wrote:
>> By adding a top-level option, I think it should be applied to all
>> benchmaks - but I guess it only supports sched messaging and futex,
>> right?
>
> Yes, for now only those. While there is opportunity for others to use it
> as well (perhaps shed-pipe & memcpy/memset), I don't think *all*
> benchmarks need multiple runs, ie: numa.
Hmm.. but it'd make users confusing if one runs the numa benchmark
with -r 5 option but it only do a single run..
Thanks,
Namhyung
On Thu, 2014-06-19 at 23:51 +0000, Namhyung Kim wrote:
> Hi Davidlohr,
>
> On Thu, Jun 19, 2014 at 11:45 AM, Davidlohr Bueso <[email protected]> wrote:
> > Hi Namhyung,
> >
> > On Thu, 2014-06-19 at 15:14 +0900, Namhyung Kim wrote:
> >> By adding a top-level option, I think it should be applied to all
> >> benchmaks - but I guess it only supports sched messaging and futex,
> >> right?
> >
> > Yes, for now only those. While there is opportunity for others to use it
> > as well (perhaps shed-pipe & memcpy/memset), I don't think *all*
> > benchmarks need multiple runs, ie: numa.
>
> Hmm.. but it'd make users confusing if one runs the numa benchmark
> with -r 5 option but it only do a single run..
Yeah, it crossed my mind. For that to be addressed, we would have to
come up with a way to determine if the argument was passed, and just
inform the user that it is not [currently(?)] supported. Some
alternatives would be to (i) explicitly document it, and/or (ii) print
out the amount of runs that will be made and if that option is
supported. All in all I think we need a better infrastructure for such
things.
I feel perf-bench suffers fundamental design issues and tries to cover
too much.
Thanks,
Davidlohr
Hi Davidlohr,
On Thu, 19 Jun 2014 20:03:57 -0700, Davidlohr Bueso wrote:
> On Thu, 2014-06-19 at 23:51 +0000, Namhyung Kim wrote:
>> Hi Davidlohr,
>>
>> On Thu, Jun 19, 2014 at 11:45 AM, Davidlohr Bueso <[email protected]> wrote:
>> > Hi Namhyung,
>> >
>> > On Thu, 2014-06-19 at 15:14 +0900, Namhyung Kim wrote:
>> >> By adding a top-level option, I think it should be applied to all
>> >> benchmaks - but I guess it only supports sched messaging and futex,
>> >> right?
>> >
>> > Yes, for now only those. While there is opportunity for others to use it
>> > as well (perhaps shed-pipe & memcpy/memset), I don't think *all*
>> > benchmarks need multiple runs, ie: numa.
>>
>> Hmm.. but it'd make users confusing if one runs the numa benchmark
>> with -r 5 option but it only do a single run..
>
> Yeah, it crossed my mind. For that to be addressed, we would have to
> come up with a way to determine if the argument was passed, and just
> inform the user that it is not [currently(?)] supported. Some
> alternatives would be to (i) explicitly document it, and/or (ii) print
> out the amount of runs that will be made and if that option is
> supported. All in all I think we need a better infrastructure for such
> things.
I think we don't need to prevent users if she really wants to run a
benchmark multiple times. So how about supporting it for all benchmarks
but providing sane per-benchmark defaults (i.e. 1 for numa, 5 for sched)?
Thanks,
Namhyung
Commit-ID: b094c99e8e284cff839400a3b61fda1fa53962fc
Gitweb: http://git.kernel.org/tip/b094c99e8e284cff839400a3b61fda1fa53962fc
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Mon, 16 Jun 2014 11:14:22 -0700
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Thu, 19 Jun 2014 16:13:15 -0300
perf bench sched-messaging: Plug memleak
Explicitly free the thread array ('pth_tab').
Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Aswin Chandramouleeswaran <[email protected]>
Cc: Hitoshi Mitake <[email protected]>
Cc: Jiri Olsa <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/bench/sched-messaging.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index cc1190a..fc4fe91 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -332,5 +332,7 @@ int bench_sched_messaging(int argc, const char **argv,
break;
}
+ free(pth_tab);
+
return 0;
}
Commit-ID: b6f0629a94f7ed6089560be7f0561be19f934fc4
Gitweb: http://git.kernel.org/tip/b6f0629a94f7ed6089560be7f0561be19f934fc4
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Mon, 16 Jun 2014 11:14:19 -0700
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Thu, 19 Jun 2014 16:13:15 -0300
perf bench: Add --repeat option
There are a number of benchmarks that do single runs and as a result
does not really help users gain a general idea of how the workload
performs. So the user must either manually do multiple runs or just use
single bogus results.
This option will enable users to specify the amount of runs (arbitrarily
defaulted to 10, to use the existing benchmarks default) through the
'--repeat' option. Add it to perf-bench instead of implementing it
always in each specific benchmark.
Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Aswin Chandramouleeswaran <[email protected]>
Cc: Hitoshi Mitake <[email protected]>
Cc: Jiri Olsa <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
[ Kept the existing default of 10, changing it to something else should
be done on separate patch ]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/Documentation/perf-bench.txt | 4 ++++
tools/perf/bench/bench.h | 1 +
tools/perf/builtin-bench.c | 7 +++++++
3 files changed, 12 insertions(+)
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index 4464ad7..f6480cb 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -16,6 +16,10 @@ This 'perf bench' command is a general framework for benchmark suites.
COMMON OPTIONS
--------------
+-r::
+--repeat=::
+Specify amount of times to repeat the run (default 10).
+
-f::
--format=::
Specify format style.
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index eba4670..3c4dd44 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -43,5 +43,6 @@ extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
#define BENCH_FORMAT_UNKNOWN -1
extern int bench_format;
+extern unsigned int bench_repeat;
#endif
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 1e6e777..b9a56fa 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -104,9 +104,11 @@ static const char *bench_format_str;
/* Output/formatting style, exported to benchmark modules: */
int bench_format = BENCH_FORMAT_DEFAULT;
+unsigned int bench_repeat = 10; /* default number of times to repeat the run */
static const struct option bench_options[] = {
OPT_STRING('f', "format", &bench_format_str, "default", "Specify format style"),
+ OPT_UINTEGER('r', "repeat", &bench_repeat, "Specify amount of times to repeat the run"),
OPT_END()
};
@@ -226,6 +228,11 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
goto end;
}
+ if (bench_repeat == 0) {
+ printf("Invalid repeat option: Must specify a positive value\n");
+ goto end;
+ }
+
if (argc < 1) {
print_usage();
goto end;
Commit-ID: d9de84afd1f3a464135abe2b26e66aa86be5af8d
Gitweb: http://git.kernel.org/tip/d9de84afd1f3a464135abe2b26e66aa86be5af8d
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Mon, 16 Jun 2014 11:14:23 -0700
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Thu, 19 Jun 2014 16:13:16 -0300
perf bench futex: Use global --repeat option
This option is available through perf-bench, use it instead and free the
local option.
Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Aswin Chandramouleeswaran <[email protected]>
Cc: Hitoshi Mitake <[email protected]>
Cc: Jiri Olsa <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/bench/futex-requeue.c | 10 +---------
tools/perf/bench/futex-wake.c | 12 ++----------
2 files changed, 3 insertions(+), 19 deletions(-)
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index a1625587..732403b 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -29,13 +29,6 @@ static u_int32_t futex1 = 0, futex2 = 0;
*/
static unsigned int nrequeue = 1;
-/*
- * There can be significant variance from run to run,
- * the more repeats, the more exact the overall avg and
- * the better idea of the futex latency.
- */
-static unsigned int repeat = 10;
-
static pthread_t *worker;
static bool done = 0, silent = 0;
static pthread_mutex_t thread_lock;
@@ -46,7 +39,6 @@ static unsigned int ncpus, threads_starting, nthreads = 0;
static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
- OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"),
OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
OPT_END()
};
@@ -146,7 +138,7 @@ int bench_futex_requeue(int argc, const char **argv,
pthread_cond_init(&thread_parent, NULL);
pthread_cond_init(&thread_worker, NULL);
- for (j = 0; j < repeat && !done; j++) {
+ for (j = 0; j < bench_repeat && !done; j++) {
unsigned int nrequeued = 0;
struct timeval start, end, runtime;
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index d096169..50022cb 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -30,15 +30,8 @@ static u_int32_t futex1 = 0;
*/
static unsigned int nwakes = 1;
-/*
- * There can be significant variance from run to run,
- * the more repeats, the more exact the overall avg and
- * the better idea of the futex latency.
- */
-static unsigned int repeat = 10;
-
pthread_t *worker;
-static bool done = 0, silent = 0;
+static bool done = false, silent = false;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats waketime_stats, wakeup_stats;
@@ -47,7 +40,6 @@ static unsigned int ncpus, threads_starting, nthreads = 0;
static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
- OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"),
OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
OPT_END()
};
@@ -149,7 +141,7 @@ int bench_futex_wake(int argc, const char **argv,
pthread_cond_init(&thread_parent, NULL);
pthread_cond_init(&thread_worker, NULL);
- for (j = 0; j < repeat && !done; j++) {
+ for (j = 0; j < bench_repeat && !done; j++) {
unsigned int nwoken = 0;
struct timeval start, end, runtime;
Commit-ID: 424e9634887842ac59c1d06d3264aaeb18853c0b
Gitweb: http://git.kernel.org/tip/424e9634887842ac59c1d06d3264aaeb18853c0b
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Mon, 16 Jun 2014 11:14:25 -0700
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Thu, 19 Jun 2014 16:13:16 -0300
perf bench mem: The -o and -n options are mutually exclusive
-o, --only-prefault Show only the result with page faults before mem*
-n, --no-prefault Show only the result without page faults before mem*
Makes no sense to call together. Applies to both memset and memcpy.
Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Aswin Chandramouleeswaran <[email protected]>
Cc: Hitoshi Mitake <[email protected]>
Cc: Jiri Olsa <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/bench/mem-memcpy.c | 5 +++++
tools/perf/bench/mem-memset.c | 5 +++++
2 files changed, 10 insertions(+)
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 5ce71d3..e622c3e 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -189,6 +189,11 @@ int bench_mem_memcpy(int argc, const char **argv,
argc = parse_options(argc, argv, options,
bench_mem_memcpy_usage, 0);
+ if (no_prefault && only_prefault) {
+ fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
+ return 1;
+ }
+
if (use_cycle)
init_cycle();
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
index 9af79d2..2a65468 100644
--- a/tools/perf/bench/mem-memset.c
+++ b/tools/perf/bench/mem-memset.c
@@ -181,6 +181,11 @@ int bench_mem_memset(int argc, const char **argv,
argc = parse_options(argc, argv, options,
bench_mem_memset_usage, 0);
+ if (no_prefault && only_prefault) {
+ fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
+ return 1;
+ }
+
if (use_cycle)
init_cycle();
Commit-ID: ecdac96899e3db3f428e4d2e978f25e3f8d35a6c
Gitweb: http://git.kernel.org/tip/ecdac96899e3db3f428e4d2e978f25e3f8d35a6c
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Mon, 16 Jun 2014 11:14:26 -0700
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Thu, 19 Jun 2014 16:13:17 -0300
perf bench sched-messaging: Drop barf()
Instead of reinventing the wheel, we can use err(2) when dealing with
fatal errors. Exit code is now always EXIT_FAILURE (1).
Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Aswin Chandramouleeswaran <[email protected]>
Cc: Hitoshi Mitake <[email protected]>
Cc: Jiri Olsa <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/bench/sched-messaging.c | 45 ++++++++++++++++----------------------
1 file changed, 19 insertions(+), 26 deletions(-)
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index fc4fe91..52a5659 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -28,6 +28,7 @@
#include <sys/time.h>
#include <sys/poll.h>
#include <limits.h>
+#include <err.h>
#define DATASIZE 100
@@ -50,12 +51,6 @@ struct receiver_context {
int wakefd;
};
-static void barf(const char *msg)
-{
- fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
- exit(1);
-}
-
static void fdpair(int fds[2])
{
if (use_pipes) {
@@ -66,7 +61,7 @@ static void fdpair(int fds[2])
return;
}
- barf(use_pipes ? "pipe()" : "socketpair()");
+ err(EXIT_FAILURE, use_pipes ? "pipe()" : "socketpair()");
}
/* Block until we're ready to go */
@@ -77,11 +72,11 @@ static void ready(int ready_out, int wakefd)
/* Tell them we're ready. */
if (write(ready_out, &dummy, 1) != 1)
- barf("CLIENT: ready write");
+ err(EXIT_FAILURE, "CLIENT: ready write");
/* Wait for "GO" signal */
if (poll(&pollfd, 1, -1) != 1)
- barf("poll");
+ err(EXIT_FAILURE, "poll");
}
/* Sender sprays loops messages down each file descriptor */
@@ -101,7 +96,7 @@ again:
ret = write(ctx->out_fds[j], data + done,
sizeof(data)-done);
if (ret < 0)
- barf("SENDER: write");
+ err(EXIT_FAILURE, "SENDER: write");
done += ret;
if (done < DATASIZE)
goto again;
@@ -131,7 +126,7 @@ static void *receiver(struct receiver_context* ctx)
again:
ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
if (ret < 0)
- barf("SERVER: read");
+ err(EXIT_FAILURE, "SERVER: read");
done += ret;
if (done < DATASIZE)
goto again;
@@ -144,14 +139,14 @@ static pthread_t create_worker(void *ctx, void *(*func)(void *))
{
pthread_attr_t attr;
pthread_t childid;
- int err;
+ int ret;
if (!thread_mode) {
/* process mode */
/* Fork the receiver. */
switch (fork()) {
case -1:
- barf("fork()");
+ err(EXIT_FAILURE, "fork()");
break;
case 0:
(*func) (ctx);
@@ -165,19 +160,17 @@ static pthread_t create_worker(void *ctx, void *(*func)(void *))
}
if (pthread_attr_init(&attr) != 0)
- barf("pthread_attr_init:");
+ err(EXIT_FAILURE, "pthread_attr_init:");
#ifndef __ia64__
if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
- barf("pthread_attr_setstacksize");
+ err(EXIT_FAILURE, "pthread_attr_setstacksize");
#endif
- err = pthread_create(&childid, &attr, func, ctx);
- if (err != 0) {
- fprintf(stderr, "pthread_create failed: %s (%d)\n",
- strerror(err), err);
- exit(-1);
- }
+ ret = pthread_create(&childid, &attr, func, ctx);
+ if (ret != 0)
+ err(EXIT_FAILURE, "pthread_create failed");
+
return childid;
}
@@ -207,14 +200,14 @@ static unsigned int group(pthread_t *pth,
+ num_fds * sizeof(int));
if (!snd_ctx)
- barf("malloc()");
+ err(EXIT_FAILURE, "malloc()");
for (i = 0; i < num_fds; i++) {
int fds[2];
struct receiver_context *ctx = malloc(sizeof(*ctx));
if (!ctx)
- barf("malloc()");
+ err(EXIT_FAILURE, "malloc()");
/* Create the pipe between client and server */
@@ -281,7 +274,7 @@ int bench_sched_messaging(int argc, const char **argv,
pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
if (!pth_tab)
- barf("main:malloc()");
+ err(EXIT_FAILURE, "main:malloc()");
fdpair(readyfds);
fdpair(wakefds);
@@ -294,13 +287,13 @@ int bench_sched_messaging(int argc, const char **argv,
/* Wait for everyone to be ready */
for (i = 0; i < total_children; i++)
if (read(readyfds[0], &dummy, 1) != 1)
- barf("Reading for readyfds");
+ err(EXIT_FAILURE, "Reading for readyfds");
gettimeofday(&start, NULL);
/* Kick them off */
if (write(wakefds[1], &dummy, 1) != 1)
- barf("Writing to start them");
+ err(EXIT_FAILURE, "Writing to start them");
/* Reap them all */
for (i = 0; i < total_children; i++)
On Thu, 2014-06-19 at 14:05 -0300, Arnaldo Carvalho de Melo wrote:
> Em Thu, Jun 19, 2014 at 09:43:49AM -0700, Davidlohr Bueso escreveu:
> > On Thu, 2014-06-19 at 13:41 -0300, Arnaldo Carvalho de Melo wrote:
> > > Em Mon, Jun 16, 2014 at 11:14:27AM -0700, Davidlohr Bueso escreveu:
> > > > Unlike futex-hash, requeuing and wakeup benchmarks do not support
> > > > shared futexes, limiting the usefulness of the programs. Correct
> > > > this, and allow using the local -S parameter. The default remains
> > > > using private futexes.
> > >
> > > Also isn't this benchmark related to something Darren did? Just checking
> > > why he isn't in the CC list, Darren, does this all sound fine to you?
> >
> > Darren is well aware of these benchmarks, and we've discussed the topic
> > in the past. This change doesn't conflict with what he maintains in the
> > futextest suite.
>
> Ok, thanks, just checking :-)
Hi Arnaldo, curious as to why this one wasn't picked up.
Thanks,
Davidlohr