I found that the UNHALTED_CORE_CYCLES event is only available in the
Intel machines and it makes other vendors/archs fail on the test. As
libpfm4 can parse the generic events like cycles, let's use them.
Fixes: 40b74c30ffb9 ("perf test: Add expand cgroup event test")
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/tests/expand-cgroup.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
index d5771e4d094f..4c59f3ae438f 100644
--- a/tools/perf/tests/expand-cgroup.c
+++ b/tools/perf/tests/expand-cgroup.c
@@ -145,7 +145,7 @@ static int expand_libpfm_events(void)
int ret;
struct evlist *evlist;
struct rblist metric_events;
- const char event_str[] = "UNHALTED_CORE_CYCLES";
+ const char event_str[] = "CYCLES";
struct option opt = {
.value = &evlist,
};
--
2.29.0.rc1.297.gfa9743e501-goog
To make the command line even more compact with cgroups, support regex
pattern matching in cgroup names.
$ perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1
3,000.73 msec cpu-clock foo # 2.998 CPUs utilized
12,530,992,699 cycles foo # 7.517 GHz (100.00%)
1,000.61 msec cpu-clock foo/bar # 1.000 CPUs utilized
4,178,529,579 cycles foo/bar # 2.506 GHz (100.00%)
1,000.03 msec cpu-clock foo/baz # 0.999 CPUs utilized
4,176,104,315 cycles foo/baz # 2.505 GHz (100.00%)
1.000892614 seconds time elapsed
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/Documentation/perf-stat.txt | 5 +-
tools/perf/util/cgroup.c | 192 +++++++++++++++++++++----
2 files changed, 171 insertions(+), 26 deletions(-)
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 9f9f29025e49..2b44c08b3b23 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -168,8 +168,9 @@ command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
--for-each-cgroup name::
Expand event list for each cgroup in "name" (allow multiple cgroups separated
-by comma). This has same effect that repeating -e option and -G option for
-each event x name. This option cannot be used with -G/--cgroup option.
+by comma). It also support regex patterns to match multiple groups. This has same
+effect that repeating -e option and -G option for each event x name. This option
+cannot be used with -G/--cgroup option.
-o file::
--output file::
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index b81324a13a2b..127c2411fb9f 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -13,9 +13,19 @@
#include <stdlib.h>
#include <string.h>
#include <api/fs/fs.h>
+#include <ftw.h>
+#include <regex.h>
int nr_cgroups;
+/* used to match cgroup name with patterns */
+struct cgroup_name {
+ struct list_head list;
+ bool used;
+ char name[];
+};
+static LIST_HEAD(cgroup_list);
+
static int open_cgroup(const char *name)
{
char path[PATH_MAX + 1];
@@ -149,6 +159,137 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup)
evsel__set_default_cgroup(evsel, cgroup);
}
+/* helper function for ftw() in match_cgroups and list_cgroups */
+static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unused,
+ int typeflag)
+{
+ struct cgroup_name *cn;
+
+ if (typeflag != FTW_D)
+ return 0;
+
+ cn = malloc(sizeof(*cn) + strlen(fpath) + 1);
+ if (cn == NULL)
+ return -1;
+
+ cn->used = false;
+ strcpy(cn->name, fpath);
+
+ list_add_tail(&cn->list, &cgroup_list);
+ return 0;
+}
+
+static void release_cgroup_list(void)
+{
+ struct cgroup_name *cn;
+
+ while (!list_empty(&cgroup_list)) {
+ cn = list_first_entry(&cgroup_list, struct cgroup_name, list);
+ list_del(&cn->list);
+ free(cn);
+ }
+}
+
+/* collect given cgroups only */
+static int list_cgroups(const char *str)
+{
+ const char *p, *e, *eos = str + strlen(str);
+ struct cgroup_name *cn;
+ char *s;
+
+ /* use given name as is - for testing purpose */
+ for (;;) {
+ p = strchr(str, ',');
+ e = p ? p : eos;
+
+ if (e - str) {
+ int ret;
+
+ s = strndup(str, e - str);
+ if (!s)
+ return -1;
+ /* pretend if it's added by ftw() */
+ ret = add_cgroup_name(s, NULL, FTW_D);
+ free(s);
+ if (ret)
+ return -1;
+ } else {
+ if (add_cgroup_name("", NULL, FTW_D) < 0)
+ return -1;
+ }
+
+ if (!p)
+ break;
+ str = p+1;
+ }
+
+ /* these groups will be used */
+ list_for_each_entry(cn, &cgroup_list, list)
+ cn->used = true;
+
+ return 0;
+}
+
+/* collect all cgroups first and then match with the pattern */
+static int match_cgroups(const char *str)
+{
+ char mnt[PATH_MAX];
+ const char *p, *e, *eos = str + strlen(str);
+ struct cgroup_name *cn;
+ regex_t reg;
+ int prefix_len;
+ char *s;
+
+ if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event"))
+ return -1;
+
+ /* cgroup_name will have a full path, skip the root directory */
+ prefix_len = strlen(mnt);
+
+ /* collect all cgroups in the cgroup_list */
+ if (ftw(mnt, add_cgroup_name, 20) < 0)
+ return -1;
+
+ for (;;) {
+ p = strchr(str, ',');
+ e = p ? p : eos;
+
+ /* allow empty cgroups, i.e., skip */
+ if (e - str) {
+ /* termination added */
+ s = strndup(str, e - str);
+ if (!s)
+ return -1;
+ if (regcomp(®, s, REG_NOSUB)) {
+ free(s);
+ return -1;
+ }
+
+ /* check cgroup name with the pattern */
+ list_for_each_entry(cn, &cgroup_list, list) {
+ char *name = cn->name + prefix_len;
+
+ if (name[0] == '/' && name[1])
+ name++;
+ if (!regexec(®, name, 0, NULL, 0))
+ cn->used = true;
+ }
+ regfree(®);
+ free(s);
+ } else {
+ /* first entry to root cgroup */
+ cn = list_first_entry(&cgroup_list, struct cgroup_name,
+ list);
+ cn->used = true;
+ }
+
+ if (!p)
+ break;
+ str = p+1;
+ }
+ return prefix_len;
+}
+
int parse_cgroups(const struct option *opt, const char *str,
int unset __maybe_unused)
{
@@ -201,6 +342,11 @@ int parse_cgroups(const struct option *opt, const char *str,
return 0;
}
+static bool has_pattern_string(const char *str)
+{
+ return !!strpbrk(str, "{}[]()|*+?^$");
+}
+
int evlist__expand_cgroup(struct evlist *evlist, const char *str,
struct rblist *metric_events, bool open_cgroup)
{
@@ -208,8 +354,9 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
struct evsel *pos, *evsel, *leader;
struct rblist orig_metric_events;
struct cgroup *cgrp = NULL;
- const char *p, *e, *eos = str + strlen(str);
+ struct cgroup_name *cn;
int ret = -1;
+ int prefix_len;
if (evlist->core.nr_entries == 0) {
fprintf(stderr, "must define events before cgroups\n");
@@ -234,24 +381,25 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
rblist__init(&orig_metric_events);
}
- for (;;) {
- p = strchr(str, ',');
- e = p ? p : eos;
+ if (has_pattern_string(str))
+ prefix_len = match_cgroups(str);
+ else
+ prefix_len = list_cgroups(str);
- /* allow empty cgroups, i.e., skip */
- if (e - str) {
- /* termination added */
- char *name = strndup(str, e - str);
- if (!name)
- goto out_err;
+ if (prefix_len < 0)
+ goto out_err;
- cgrp = cgroup__new(name, open_cgroup);
- free(name);
- if (cgrp == NULL)
- goto out_err;
- } else {
- cgrp = NULL;
- }
+ list_for_each_entry(cn, &cgroup_list, list) {
+ char *name;
+
+ if (!cn->used)
+ continue;
+
+ /* cgroup_name might have a full path, skip the prefix */
+ name = cn->name + prefix_len;
+ if (name[0] == '/' && name[1])
+ name++;
+ cgrp = cgroup__new(name, open_cgroup);
leader = NULL;
evlist__for_each_entry(orig_list, pos) {
@@ -277,23 +425,19 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
if (metricgroup__copy_metric_events(tmp_list, cgrp,
metric_events,
&orig_metric_events) < 0)
- break;
+ goto out_err;
}
perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries);
tmp_list->core.nr_entries = 0;
-
- if (!p) {
- ret = 0;
- break;
- }
- str = p+1;
}
+ ret = 0;
out_err:
evlist__delete(orig_list);
evlist__delete(tmp_list);
rblist__exit(&orig_metric_events);
+ release_cgroup_list();
return ret;
}
--
2.29.0.rc1.297.gfa9743e501-goog
On Sat, Oct 24, 2020 at 11:59:17AM +0900, Namhyung Kim wrote:
> I found that the UNHALTED_CORE_CYCLES event is only available in the
> Intel machines and it makes other vendors/archs fail on the test. As
> libpfm4 can parse the generic events like cycles, let's use them.
>
> Fixes: 40b74c30ffb9 ("perf test: Add expand cgroup event test")
> Signed-off-by: Namhyung Kim <[email protected]>
So would the test still fail when libpfm is not compiled in?
-Andi
On Sat, Oct 24, 2020 at 11:59:18AM +0900, Namhyung Kim wrote:
> To make the command line even more compact with cgroups, support regex
> pattern matching in cgroup names.
>
> $ perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1
>
> 3,000.73 msec cpu-clock foo # 2.998 CPUs utilized
> 12,530,992,699 cycles foo # 7.517 GHz (100.00%)
> 1,000.61 msec cpu-clock foo/bar # 1.000 CPUs utilized
> 4,178,529,579 cycles foo/bar # 2.506 GHz (100.00%)
> 1,000.03 msec cpu-clock foo/baz # 0.999 CPUs utilized
> 4,176,104,315 cycles foo/baz # 2.505 GHz (100.00%)
just curious.. there was another foo/XXX group using the
rest of the cycles, right?
also perhaps we want to warn if there's no match found:
$ sudo ./perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1
Performance counter stats for 'system wide':
1.002375575 seconds time elapsed
jirka
Hi Jiri,
On Mon, Oct 26, 2020 at 8:40 PM Jiri Olsa <[email protected]> wrote:
>
> On Sat, Oct 24, 2020 at 11:59:18AM +0900, Namhyung Kim wrote:
> > To make the command line even more compact with cgroups, support regex
> > pattern matching in cgroup names.
> >
> > $ perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1
> >
> > 3,000.73 msec cpu-clock foo # 2.998 CPUs utilized
> > 12,530,992,699 cycles foo # 7.517 GHz (100.00%)
> > 1,000.61 msec cpu-clock foo/bar # 1.000 CPUs utilized
> > 4,178,529,579 cycles foo/bar # 2.506 GHz (100.00%)
> > 1,000.03 msec cpu-clock foo/baz # 0.999 CPUs utilized
> > 4,176,104,315 cycles foo/baz # 2.505 GHz (100.00%)
>
> just curious.. there was another foo/XXX group using the
> rest of the cycles, right?
No, if so it should be displayed too. But actually there was a process
in the foo cgroup itself.
>
> also perhaps we want to warn if there's no match found:
>
> $ sudo ./perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1
>
> Performance counter stats for 'system wide':
>
>
> 1.002375575 seconds time elapsed
>
Right, will check this case.
Thanks
Namhyung
On Mon, Oct 26, 2020 at 09:32:34PM +0900, Namhyung Kim wrote:
> Hi Jiri,
>
> On Mon, Oct 26, 2020 at 8:40 PM Jiri Olsa <[email protected]> wrote:
> >
> > On Sat, Oct 24, 2020 at 11:59:18AM +0900, Namhyung Kim wrote:
> > > To make the command line even more compact with cgroups, support regex
> > > pattern matching in cgroup names.
> > >
> > > $ perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1
> > >
> > > 3,000.73 msec cpu-clock foo # 2.998 CPUs utilized
> > > 12,530,992,699 cycles foo # 7.517 GHz (100.00%)
> > > 1,000.61 msec cpu-clock foo/bar # 1.000 CPUs utilized
> > > 4,178,529,579 cycles foo/bar # 2.506 GHz (100.00%)
> > > 1,000.03 msec cpu-clock foo/baz # 0.999 CPUs utilized
> > > 4,176,104,315 cycles foo/baz # 2.505 GHz (100.00%)
> >
> > just curious.. there was another foo/XXX group using the
> > rest of the cycles, right?
>
> No, if so it should be displayed too. But actually there was a process
> in the foo cgroup itself.
nah, right ;-) ok
jirka
>
> >
> > also perhaps we want to warn if there's no match found:
> >
> > $ sudo ./perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1
> >
> > Performance counter stats for 'system wide':
> >
> >
> > 1.002375575 seconds time elapsed
> >
>
> Right, will check this case.
>
> Thanks
> Namhyung
>
On Fri, Oct 23, 2020 at 7:59 PM Namhyung Kim <[email protected]> wrote:
>
> I found that the UNHALTED_CORE_CYCLES event is only available in the
> Intel machines and it makes other vendors/archs fail on the test. As
> libpfm4 can parse the generic events like cycles, let's use them.
>
> Fixes: 40b74c30ffb9 ("perf test: Add expand cgroup event test")
> Signed-off-by: Namhyung Kim <[email protected]>
Acked-by: Ian Rogers <[email protected]>
Thanks,
Ian
> ---
> tools/perf/tests/expand-cgroup.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
> index d5771e4d094f..4c59f3ae438f 100644
> --- a/tools/perf/tests/expand-cgroup.c
> +++ b/tools/perf/tests/expand-cgroup.c
> @@ -145,7 +145,7 @@ static int expand_libpfm_events(void)
> int ret;
> struct evlist *evlist;
> struct rblist metric_events;
> - const char event_str[] = "UNHALTED_CORE_CYCLES";
> + const char event_str[] = "CYCLES";
> struct option opt = {
> .value = &evlist,
> };
> --
> 2.29.0.rc1.297.gfa9743e501-goog
>
Em Mon, Oct 26, 2020 at 09:32:34PM +0900, Namhyung Kim escreveu:
> Hi Jiri,
>
> On Mon, Oct 26, 2020 at 8:40 PM Jiri Olsa <[email protected]> wrote:
> >
> > On Sat, Oct 24, 2020 at 11:59:18AM +0900, Namhyung Kim wrote:
> > > To make the command line even more compact with cgroups, support regex
> > > pattern matching in cgroup names.
> > >
> > > $ perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1
> > >
> > > 3,000.73 msec cpu-clock foo # 2.998 CPUs utilized
> > > 12,530,992,699 cycles foo # 7.517 GHz (100.00%)
> > > 1,000.61 msec cpu-clock foo/bar # 1.000 CPUs utilized
> > > 4,178,529,579 cycles foo/bar # 2.506 GHz (100.00%)
> > > 1,000.03 msec cpu-clock foo/baz # 0.999 CPUs utilized
> > > 4,176,104,315 cycles foo/baz # 2.505 GHz (100.00%)
> >
> > just curious.. there was another foo/XXX group using the
> > rest of the cycles, right?
>
> No, if so it should be displayed too. But actually there was a process
> in the foo cgroup itself.
>
> >
> > also perhaps we want to warn if there's no match found:
> >
> > $ sudo ./perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1
> >
> > Performance counter stats for 'system wide':
> >
> >
> > 1.002375575 seconds time elapsed
> >
>
> Right, will check this case.
Hum, I thought that could be done on top of this one, but then, the
ambiguity of:
1. No samples for a cgroups matching that expression
2. No cgroups match that expression
Is real and warrants a warning for the 'no cgroups match the
--for-each-group regexp' case.
So I'll wait for v3 with that warning,
Thanks,
- Arnaldo
Hi Arnaldo,
On Tue, Oct 27, 2020 at 2:53 AM Arnaldo Carvalho de Melo
<[email protected]> wrote:
>
> Em Mon, Oct 26, 2020 at 09:32:34PM +0900, Namhyung Kim escreveu:
> > Hi Jiri,
> >
> > On Mon, Oct 26, 2020 at 8:40 PM Jiri Olsa <[email protected]> wrote:
> > > also perhaps we want to warn if there's no match found:
> > >
> > > $ sudo ./perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1
> > >
> > > Performance counter stats for 'system wide':
> > >
> > >
> > > 1.002375575 seconds time elapsed
> > >
> >
> > Right, will check this case.
>
> Hum, I thought that could be done on top of this one, but then, the
> ambiguity of:
>
> 1. No samples for a cgroups matching that expression
>
> 2. No cgroups match that expression
>
> Is real and warrants a warning for the 'no cgroups match the
> --for-each-group regexp' case.
The 1 will be handled by perf stat showing <not counted> and
I'll add a warning for the item 2 like below:
# perf stat -a -e cycles --for-each-cgroup ^foo sleep 1
no cgroup matched: ^foo
Usage: perf stat [<options>] [<command>]
--for-each-cgroup <name>
expand events for each cgroup
Thanks
Namhyung