2024-04-12 21:08:19

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 00/16] Perf stat metric grouping with hardware information

From: Weilin Wang <[email protected]>

Changes in v5:
- Update code about "TakenAlone" to use MSR value instead. Currently, MSR value
is used in string format. We are planning to improve this part (maybe use bitmap)
in future. [Ian]
- Update variable/JSON field names and comments for better readbility [Ian]
- Update metricgroup__build_event_string to also use get_too_event_str helper
function [Ian]

v4: https://lore.kernel.org/all/[email protected]/


Weilin Wang (16):
perf stat: Add new field in stat_config to enable hardware aware
grouping.
perf stat: Add basic functions for the hardware aware grouping
perf pmu-events: Add functions in jevent.py to parse counter and event
info for hardware aware grouping
find_bit: add _find_last_and_bit() to support finding the most
significant set bit
perf stat: Add functions to set counter bitmaps for hardware-grouping
method
perf stat: Add functions to get counter info
perf stat: Add functions to create new group and assign events into
groups
perf stat: Add build string function and topdown events handling in
hardware-grouping
perf stat: Add function to handle special events in hardware-grouping
perf stat: Add function to combine metrics for hardware-grouping
perf stat: Add partial support on MSR in hardware-grouping
perf stat: Handle NMI in hardware-grouping
perf stat: Code refactoring in hardware-grouping
perf stat: Add tool events support in hardware-grouping
perf stat: use tool event helper function in
metricgroup__build_event_string
perf stat: Add hardware-grouping cmd option to perf stat

tools/include/linux/find.h | 18 +
tools/lib/find_bit.c | 33 +
tools/perf/builtin-stat.c | 7 +
tools/perf/pmu-events/jevents.py | 208 ++++-
tools/perf/pmu-events/pmu-events.h | 38 +-
tools/perf/util/metricgroup.c | 1393 ++++++++++++++++++++++++----
tools/perf/util/metricgroup.h | 1 +
tools/perf/util/stat.h | 1 +
8 files changed, 1525 insertions(+), 174 deletions(-)

--
2.42.0



2024-04-12 21:08:32

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 01/16] perf stat: Add new field in stat_config to enable hardware aware grouping.

From: Weilin Wang <[email protected]>

Hardware counter and event information could be used to help creating event
groups that better utilize hardware counters and improve multiplexing.

Reviewed-by: Ian Rogers <[email protected]>
Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/builtin-stat.c | 5 +++++
tools/perf/util/metricgroup.c | 5 +++++
tools/perf/util/metricgroup.h | 1 +
tools/perf/util/stat.h | 1 +
4 files changed, 12 insertions(+)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6bba1a89d030..c4a5f0984295 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2106,6 +2106,7 @@ static int add_default_attributes(void)
stat_config.metric_no_threshold,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
+ stat_config.hardware_aware_grouping,
&stat_config.metric_events);
}

@@ -2139,6 +2140,7 @@ static int add_default_attributes(void)
stat_config.metric_no_threshold,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
+ stat_config.hardware_aware_grouping,
&stat_config.metric_events);
}

@@ -2173,6 +2175,7 @@ static int add_default_attributes(void)
/*metric_no_threshold=*/true,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
+ stat_config.hardware_aware_grouping,
&stat_config.metric_events) < 0)
return -1;
}
@@ -2214,6 +2217,7 @@ static int add_default_attributes(void)
/*metric_no_threshold=*/true,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
+ stat_config.hardware_aware_grouping,
&stat_config.metric_events) < 0)
return -1;

@@ -2748,6 +2752,7 @@ int cmd_stat(int argc, const char **argv)
stat_config.metric_no_threshold,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
+ stat_config.hardware_aware_grouping,
&stat_config.metric_events);

zfree(&metrics);
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 79ef6095ab28..11613450725a 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1690,12 +1690,17 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
bool metric_no_threshold,
const char *user_requested_cpu_list,
bool system_wide,
+ bool hardware_aware_grouping,
struct rblist *metric_events)
{
const struct pmu_metrics_table *table = pmu_metrics_table__find();

if (!table)
return -EINVAL;
+ if (hardware_aware_grouping) {
+ pr_debug("Use hardware aware grouping instead of traditional metric grouping method\n");
+ }
+

return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge,
metric_no_threshold, user_requested_cpu_list, system_wide,
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index d5325c6ec8e1..779f6ede1b51 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -77,6 +77,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
bool metric_no_threshold,
const char *user_requested_cpu_list,
bool system_wide,
+ bool hardware_aware_grouping,
struct rblist *metric_events);
int metricgroup__parse_groups_test(struct evlist *evlist,
const struct pmu_metrics_table *table,
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index d6e5c8787ba2..fd7a187551bd 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -87,6 +87,7 @@ struct perf_stat_config {
bool metric_no_group;
bool metric_no_merge;
bool metric_no_threshold;
+ bool hardware_aware_grouping;
bool stop_read_counter;
bool iostat_run;
char *user_requested_cpu_list;
--
2.42.0


2024-04-12 21:08:56

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 04/16] find_bit: add _find_last_and_bit() to support finding the most significant set bit

From: Weilin Wang <[email protected]>

This function is required for more efficient PMU counter assignment.

When we use bitmap to log available PMU counters and counters that support a
given event, we want to find a most significant set bit so that we could
starting assigning counters with larger index first. This is helpful
because counters with smaller indexes usually are more generic and
support more events.

Reviewed-by: Ian Rogers <[email protected]>
Signed-off-by: Weilin Wang <[email protected]>
---
tools/include/linux/find.h | 18 ++++++++++++++++++
tools/lib/find_bit.c | 33 +++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+)

diff --git a/tools/include/linux/find.h b/tools/include/linux/find.h
index 38c0a542b0e2..fce336ec2b96 100644
--- a/tools/include/linux/find.h
+++ b/tools/include/linux/find.h
@@ -18,6 +18,8 @@ extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long si
extern unsigned long _find_first_and_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size);
extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_last_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size);

#ifndef find_next_bit
/**
@@ -174,4 +176,20 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
}
#endif

+#ifndef find_last_and_bit
+static inline
+unsigned long find_last_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0);
+
+ return val ? __fls(val) : size;
+ }
+
+ return _find_last_and_bit(addr1, addr2, size);
+}
+#endif
+
#endif /*__LINUX_FIND_H_ */
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
index 6a3dc167d30e..a84817d80c46 100644
--- a/tools/lib/find_bit.c
+++ b/tools/lib/find_bit.c
@@ -67,6 +67,27 @@ out: \
sz; \
})

+/*
+ * Common helper for find_bit() function family
+ * @FETCH: The expression that fetches and pre-processes each word of bitmap(s)
+ * @MUNGE: The expression that post-processes a word containing found bit (may be empty)
+ * @size: The bitmap size in bits
+ */
+#define FIND_LAST_BIT(FETCH, MUNGE, size) \
+({ \
+ unsigned long idx, val, sz = (size); \
+ \
+ for (idx = ((size - 1) / BITS_PER_LONG); idx >= 0; idx--) { \
+ val = (FETCH); \
+ if (val) { \
+ sz = min(idx * BITS_PER_LONG + __fls(MUNGE(val)), sz); \
+ break; \
+ } \
+ } \
+ \
+ sz; \
+})
+
#ifndef find_first_bit
/*
* Find the first set bit in a memory region.
@@ -121,3 +142,15 @@ unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits
return FIND_NEXT_BIT(~addr[idx], /* nop */, nbits, start);
}
#endif
+
+#ifndef find_last_and_bit
+/*
+ * Find the last set bit in two memory regions.
+ */
+unsigned long _find_last_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2,
+ unsigned long size)
+{
+ return FIND_LAST_BIT(addr1[idx] & addr2[idx], /* nop */, size);
+}
+#endif
--
2.42.0


2024-04-12 21:09:17

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 05/16] perf stat: Add functions to set counter bitmaps for hardware-grouping method

From: Weilin Wang <[email protected]>

Add metricgroup__event_info data structure to represent an event in the
metric grouping context; the list of counters and the PMU name an event
should be collected with.

Add functions to parse event counter info from pmu-events and generate a
list of metricgroup__event_info data to prepare grouping.

Reviewed-by: Ian Rogers <[email protected]>
Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/util/metricgroup.c | 219 +++++++++++++++++++++++++++++++++-
1 file changed, 216 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 8047f03b2b1f..1a7ac17f7ae1 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -24,6 +24,7 @@
#include <linux/list_sort.h>
#include <linux/string.h>
#include <linux/zalloc.h>
+#include <linux/bitmap.h>
#include <perf/cpumap.h>
#include <subcmd/parse-options.h>
#include <api/fs/fs.h>
@@ -159,6 +160,29 @@ struct metric {
struct evlist *evlist;
};

+/* Maximum number of counters per PMU*/
+#define NR_COUNTERS 16
+
+/**
+ * An event used in a metric. This info is for metric grouping.
+ */
+struct metricgroup__event_info {
+ struct list_head nd;
+ /** The name of the event. */
+ const char *name;
+ /** The name of the pmu the event be collected on. */
+ const char *pmu_name;
+ /** The event uses fixed counter. */
+ bool fixed_counter;
+ /**
+ * The event uses special counters that we consider that as free counter
+ * during the event grouping.
+ */
+ bool free_counter;
+ /** The counters the event allowed to be collected on. */
+ DECLARE_BITMAP(counters, NR_COUNTERS);
+};
+
/**
* Each group is one node in the group string list.
*/
@@ -1440,6 +1464,181 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
return ret;
}

+/**
+ * set_counter_bitmap - The counter bitmap: [0-15].
+ */
+static int set_counter_bitmap(int pos, unsigned long *bitmap)
+{
+ if (pos >= NR_COUNTERS || pos < 0)
+ return -EINVAL;
+ __set_bit(pos, bitmap);
+ return 0;
+}
+
+static int parse_fixed_counter(const char *counter,
+ unsigned long *bitmap,
+ bool *fixed)
+{
+ int ret = -ENOENT;
+ /* TODO: this pattern maybe different on some other platforms */
+ const char *pattern = "Fixed counter ";
+ int pos = 0;
+
+ if (!strncmp(counter, pattern, strlen(pattern))) {
+ pos = atoi(counter + strlen(pattern));
+ ret = set_counter_bitmap(pos, bitmap);
+ if (ret)
+ return ret;
+ *fixed = true;
+ return 0;
+ }
+ return ret;
+}
+
+/**
+ * parse_counter - Parse event counter info from pmu-events and set up bitmap
+ * accordingly.
+ *
+ * @counter: counter info string to be parsed.
+ * @bitmap: bitmap to set based on counter info parsed.
+ * @fixed: is set to true if the event uses fixed counter.
+ */
+static int parse_counter(const char *counter,
+ unsigned long *bitmap,
+ bool *fixed)
+{
+ int ret = 0;
+ char *p;
+ char *tok;
+ int pos = 0;
+
+ ret = parse_fixed_counter(counter, bitmap, fixed);
+ /* ret==0 means matched with fixed counter */
+ if (ret == 0)
+ return ret;
+
+ p = strdup(counter);
+ if (!p)
+ return -ENOMEM;
+ tok = strtok(p, ",");
+ if (!tok)
+ return -ENOENT;
+
+ while (tok) {
+ pos = atoi(tok);
+ ret = set_counter_bitmap(pos, bitmap);
+ if (ret)
+ return ret;
+ tok = strtok(NULL, ",");
+ }
+ return 0;
+}
+
+static struct metricgroup__event_info *event_info__new(const char *name,
+ const char *pmu_name,
+ const char *counter,
+ bool free_counter)
+{
+ int ret = 0;
+ char *bit_buf = malloc(NR_COUNTERS);
+ bool fixed_counter = false;
+ struct metricgroup__event_info *e;
+
+ e = zalloc(sizeof(*e));
+ if (!e)
+ return NULL;
+ if (!pmu_name)
+ pmu_name = "core";
+
+ e->name = strdup(name);
+ e->pmu_name = strdup(pmu_name);
+ if (!e->pmu_name || !e->name)
+ return NULL;
+ e->free_counter = free_counter;
+ if (free_counter) {
+ ret = set_counter_bitmap(0, e->counters);
+ if (ret)
+ return NULL;
+ } else {
+ ret = parse_counter(counter, e->counters, &fixed_counter);
+ if (ret)
+ return NULL;
+ e->fixed_counter = fixed_counter;
+ }
+
+ bitmap_scnprintf(e->counters, NR_COUNTERS, bit_buf, NR_COUNTERS);
+ pr_debug("Event %s requires pmu %s counter: %s bitmap %s, [pmu=%s]\n",
+ e->name, e->pmu_name, counter, bit_buf, pmu_name);
+
+ return e;
+}
+
+struct metricgroup__add_metric_event_data {
+ struct list_head *list;
+ /* pure event name, exclude umask and other info*/
+ const char *event_name;
+ /* event name and umask if applicable*/
+ const char *event_id;
+};
+
+static int metricgroup__add_metric_event_callback(const struct pmu_event *pe,
+ const struct pmu_events_table *table __maybe_unused,
+ void *data)
+{
+ struct metricgroup__event_info *event;
+ struct metricgroup__add_metric_event_data *d = data;
+
+ if (!strcasecmp(pe->name, d->event_name)) {
+ event = event_info__new(d->event_id, pe->pmu, pe->counter, /*free_counter=*/false);
+ if (!event)
+ return -ENOMEM;
+ list_add(&event->nd, d->list);
+ }
+
+ return 0;
+}
+
+/**
+ * get_metricgroup_events - Find counter requirement of events from the
+ * pmu_events table
+ * @full_id: the full event identifiers.
+ * @table: pmu_events table that is searched for event data.
+ * @event_info_list: the list that the new event counter info added to.
+ */
+static int get_metricgroup_events(const char *full_id,
+ const struct pmu_events_table *table,
+ struct list_head *event_info_list)
+{
+ LIST_HEAD(list);
+ int ret = 0;
+ const char *id;
+ const char *rsep, *sep = strchr(full_id, '@');
+
+ if (sep) {
+ rsep = strchr(full_id, ',');
+ id = strndup(sep + 1, rsep - sep - 1);
+ if (!id) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else {
+ id = full_id;
+ }
+ {
+ struct metricgroup__add_metric_event_data data = {
+ .list = &list,
+ .event_name = id,
+ .event_id = full_id,
+ };
+ ret = pmu_events_table_for_each_event(table,
+ metricgroup__add_metric_event_callback, &data);
+ }
+
+out:
+ list_splice(&list, event_info_list);
+ return ret;
+}
+
/**
* hw_aware_build_grouping - Build event groupings by reading counter
* requirement of the events and counter available on the system from
@@ -1453,9 +1652,25 @@ static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,
const char *modifier __maybe_unused)
{
int ret = 0;
+ struct hashmap_entry *cur;
+ LIST_HEAD(pmu_info_list);
+ LIST_HEAD(event_info_list);
+ size_t bkt;
+ const struct pmu_events_table *etable = perf_pmu__find_events_table(NULL);
+
+#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
+ hashmap__for_each_entry(ctx->ids, cur, bkt) {
+ const char *id = cur->pkey;
+
+ pr_debug("found event %s\n", id);
+
+ ret = get_metricgroup_events(id, etable, &event_info_list);
+ if (ret)
+ return ret;
+ }

- pr_debug("This is a placeholder\n");
return ret;
+#undef RETURN_IF_NON_ZERO
}

static void group_str_free(struct metricgroup__group_strs *g)
@@ -1529,8 +1744,6 @@ static int hw_aware_parse_ids(struct perf_pmu *fake_pmu,
*out_evlist = parsed_evlist;
parsed_evlist = NULL;
err_out:
- parse_events_error__exit(&parse_error);
- evlist__delete(parsed_evlist);
metricgroup__free_grouping_strs(&groupings);
return ret;
}
--
2.42.0


2024-04-12 21:09:20

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 03/16] perf pmu-events: Add functions in jevent.py to parse counter and event info for hardware aware grouping

From: Weilin Wang <[email protected]>

These functions are added to parse event counter restrictions and counter
availability info from json files so that the metric grouping method could
do grouping based on the counter restriction of events and the counters
that are available on the system.

Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/pmu-events/jevents.py | 204 +++++++++++++++++++++++++++--
tools/perf/pmu-events/pmu-events.h | 32 ++++-
2 files changed, 224 insertions(+), 12 deletions(-)

diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index e42efc16723e..7cfd86d77fea 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -23,6 +23,8 @@ _metric_tables = []
_sys_metric_tables = []
# Mapping between sys event table names and sys metric table names.
_sys_event_table_to_metric_table_mapping = {}
+# List of regular PMU counter layout tables.
+_pmu_layouts_tables = []
# Map from an event name to an architecture standard
# JsonEvent. Architecture standard events are in json files in the top
# f'{_args.starting_dir}/{_args.arch}' directory.
@@ -31,6 +33,10 @@ _arch_std_events = {}
_pending_events = []
# Name of events table to be written out
_pending_events_tblname = None
+# PMU counter layout to write out when the layout table is closed
+_pending_pmu_counts = []
+# Name of PMU counter layout table to be written out
+_pending_pmu_counts_tblname = None
# Metrics to write out when the table is closed
_pending_metrics = []
# Name of metrics table to be written out
@@ -51,6 +57,11 @@ _json_event_attributes = [
'long_desc'
]

+# Attributes that are in pmu_unit_layout.
+_json_layout_attributes = [
+ 'pmu', 'desc'
+]
+
# Attributes that are in pmu_metric rather than pmu_event.
_json_metric_attributes = [
'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
@@ -265,7 +276,7 @@ class JsonEvent:

def unit_to_pmu(unit: str) -> Optional[str]:
"""Convert a JSON Unit to Linux PMU name."""
- if not unit:
+ if not unit or unit == "core":
return 'default_core'
# Comment brought over from jevents.c:
# it's not realistic to keep adding these, we need something more scalable ...
@@ -334,6 +345,19 @@ class JsonEvent:
if 'Errata' in jd:
extra_desc += ' Spec update: ' + jd['Errata']
self.pmu = unit_to_pmu(jd.get('Unit'))
+ # The list of counter(s) the event could be collected with
+ class Counter:
+ gp = str()
+ fixed = str()
+ self.counters = {'list': str(), 'num': Counter()}
+ self.counters['list'] = jd.get('Counter')
+ # Number of generic counter
+ self.counters['num'].gp = jd.get('CountersNumGeneric')
+ # Number of fixed counter
+ self.counters['num'].fixed = jd.get('CountersNumFixed')
+ # If the event uses an MSR, other event uses the same MSR could not be
+ # schedule to collect at the same time.
+ self.msr = jd.get('MSRIndex')
filter = jd.get('Filter')
self.unit = jd.get('ScaleUnit')
self.perpkg = jd.get('PerPkg')
@@ -409,8 +433,20 @@ class JsonEvent:
s += f'\t{attr} = {value},\n'
return s + '}'

- def build_c_string(self, metric: bool) -> str:
+ def build_c_string(self, metric: bool, layout: bool) -> str:
s = ''
+ if layout:
+ for attr in _json_layout_attributes:
+ x = getattr(self, attr)
+ if attr in _json_enum_attributes:
+ s += x if x else '0'
+ else:
+ s += f'{x}\\000' if x else '\\000'
+ x = self.counters['num'].gp
+ s += x if x else '0'
+ x = self.counters['num'].fixed
+ s += x if x else '0'
+ return s
for attr in _json_metric_attributes if metric else _json_event_attributes:
x = getattr(self, attr)
if metric and x and attr == 'metric_expr':
@@ -423,12 +459,15 @@ class JsonEvent:
s += x if x else '0'
else:
s += f'{x}\\000' if x else '\\000'
+ if not metric:
+ x = self.counters['list']
+ s += f'{x}\\000' if x else '\\000'
return s

- def to_c_string(self, metric: bool) -> str:
+ def to_c_string(self, metric: bool, layout: bool) -> str:
"""Representation of the event as a C struct initializer."""

- s = self.build_c_string(metric)
+ s = self.build_c_string(metric, layout)
return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n'


@@ -465,6 +504,8 @@ def preprocess_arch_std_files(archpath: str) -> None:
_arch_std_events[event.name.lower()] = event
if event.metric_name:
_arch_std_events[event.metric_name.lower()] = event
+ if event.counters['num'].gp:
+ _arch_std_events[event.pmu.lower()] = event


def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
@@ -474,6 +515,8 @@ def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
_pending_events.append(e)
if e.metric_name:
_pending_metrics.append(e)
+ if e.counters['num'].gp:
+ _pending_pmu_counts.append(e)


def print_pending_events() -> None:
@@ -514,7 +557,7 @@ def print_pending_events() -> None:
last_pmu = event.pmu
pmus.add((event.pmu, pmu_name))

- _args.output_file.write(event.to_c_string(metric=False))
+ _args.output_file.write(event.to_c_string(metric=False, layout=False))
_pending_events = []

_args.output_file.write(f"""
@@ -569,7 +612,7 @@ def print_pending_metrics() -> None:
last_pmu = metric.pmu
pmus.add((metric.pmu, pmu_name))

- _args.output_file.write(metric.to_c_string(metric=True))
+ _args.output_file.write(metric.to_c_string(metric=True, layout=False))
_pending_metrics = []

_args.output_file.write(f"""
@@ -587,6 +630,35 @@ const struct pmu_table_entry {_pending_metrics_tblname}[] = {{
""")
_args.output_file.write('};\n\n')

+def print_pending_pmu_counter_layout_table() -> None:
+ '''Print counter layout data from counter.json file to counter layout table in
+ c-string'''
+
+ def pmu_counts_cmp_key(j: JsonEvent) -> Tuple[bool, str, str]:
+ def fix_none(s: Optional[str]) -> str:
+ if s is None:
+ return ''
+ return s
+
+ return (j.desc is not None, fix_none(j.pmu))
+
+ global _pending_pmu_counts
+ if not _pending_pmu_counts:
+ return
+
+ global _pending_pmu_counts_tblname
+ global pmu_layouts_tables
+ _pmu_layouts_tables.append(_pending_pmu_counts_tblname)
+
+ _args.output_file.write(
+ f'static const struct compact_pmu_event {_pending_pmu_counts_tblname}[] = {{\n')
+
+ for pmu_layout in sorted(_pending_pmu_counts, key=pmu_counts_cmp_key):
+ _args.output_file.write(pmu_layout.to_c_string(metric=False, layout=True))
+ _pending_pmu_counts = []
+
+ _args.output_file.write('};\n\n')
+
def get_topic(topic: str) -> str:
if topic.endswith('metrics.json'):
return 'metrics'
@@ -623,10 +695,12 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
pmu_name = f"{event.pmu}\\000"
if event.name:
_bcs.add(pmu_name, metric=False)
- _bcs.add(event.build_c_string(metric=False), metric=False)
+ _bcs.add(event.build_c_string(metric=False, layout=False), metric=False)
if event.metric_name:
_bcs.add(pmu_name, metric=True)
- _bcs.add(event.build_c_string(metric=True), metric=True)
+ _bcs.add(event.build_c_string(metric=True, layout=False), metric=True)
+ if event.counters['num'].gp:
+ _bcs.add(event.build_c_string(metric=False, layout=True), metric=False)

def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
"""Process a JSON file during the main walk."""
@@ -640,11 +714,14 @@ def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
if item.is_dir() and is_leaf_dir(item.path):
print_pending_events()
print_pending_metrics()
+ print_pending_pmu_counter_layout_table()

global _pending_events_tblname
_pending_events_tblname = file_name_to_table_name('pmu_events_', parents, item.name)
global _pending_metrics_tblname
_pending_metrics_tblname = file_name_to_table_name('pmu_metrics_', parents, item.name)
+ global _pending_pmu_counts_tblname
+ _pending_pmu_counts_tblname = file_name_to_table_name('pmu_layouts_', parents, item.name)

if item.name == 'sys':
_sys_event_table_to_metric_table_mapping[_pending_events_tblname] = _pending_metrics_tblname
@@ -678,6 +755,12 @@ struct pmu_metrics_table {
uint32_t num_pmus;
};

+/* Struct used to make the PMU counter layout table implementation opaque to callers. */
+struct pmu_layouts_table {
+ const struct compact_pmu_event *entries;
+ size_t length;
+};
+
/*
* Map a CPU to its table of PMU events. The CPU is identified by the
* cpuid field, which is an arch-specific identifier for the CPU.
@@ -691,6 +774,7 @@ struct pmu_events_map {
const char *cpuid;
struct pmu_events_table event_table;
struct pmu_metrics_table metric_table;
+ struct pmu_layouts_table layout_table;
};

/*
@@ -735,6 +819,12 @@ const struct pmu_events_map pmu_events_map[] = {
metric_size = '0'
if event_size == '0' and metric_size == '0':
continue
+ layout_tblname = file_name_to_table_name('pmu_layouts_', [], row[2].replace('/', '_'))
+ if layout_tblname in _pmu_layouts_tables:
+ layout_size = f'ARRAY_SIZE({layout_tblname})'
+ else:
+ layout_tblname = 'NULL'
+ layout_size = '0'
cpuid = row[0].replace('\\', '\\\\')
_args.output_file.write(f"""{{
\t.arch = "{arch}",
@@ -746,6 +836,10 @@ const struct pmu_events_map pmu_events_map[] = {
\t.metric_table = {{
\t\t.pmus = {metric_tblname},
\t\t.num_pmus = {metric_size}
+\t}},
+\t.layout_table = {{
+\t\t.entries = {layout_tblname},
+\t\t.length = {layout_size}
\t}}
}},
""")
@@ -756,6 +850,7 @@ const struct pmu_events_map pmu_events_map[] = {
\t.cpuid = 0,
\t.event_table = { 0, 0 },
\t.metric_table = { 0, 0 },
+\t.layout_table = { 0, 0 },
}
};
""")
@@ -824,6 +919,9 @@ static void decompress_event(int offset, struct pmu_event *pe)
_args.output_file.write('\tp++;')
else:
_args.output_file.write('\twhile (*p++);')
+ _args.output_file.write('\twhile (*p++);')
+ _args.output_file.write(f'\n\tpe->counters_list = ')
+ _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
_args.output_file.write("""}

static void decompress_metric(int offset, struct pmu_metric *pm)
@@ -844,6 +942,30 @@ static void decompress_metric(int offset, struct pmu_metric *pm)
_args.output_file.write('\twhile (*p++);')
_args.output_file.write("""}

+static void decompress_layout(int offset, struct pmu_layout *pm)
+{
+\tconst char *p = &big_c_string[offset];
+""")
+ for attr in _json_layout_attributes:
+ _args.output_file.write(f'\n\tpm->{attr} = ')
+ if attr in _json_enum_attributes:
+ _args.output_file.write("*p - '0';\n")
+ else:
+ _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
+ if attr == _json_layout_attributes[-1]:
+ continue
+ if attr in _json_enum_attributes:
+ _args.output_file.write('\tp++;')
+ else:
+ _args.output_file.write('\twhile (*p++);')
+ _args.output_file.write('\tp++;')
+ _args.output_file.write(f'\n\tpm->counters_num_gp = ')
+ _args.output_file.write("*p - '0';\n")
+ _args.output_file.write('\tp++;')
+ _args.output_file.write(f'\n\tpm->counters_num_fixed = ')
+ _args.output_file.write("*p - '0';\n")
+ _args.output_file.write("""}
+
static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
const struct pmu_table_entry *pmu,
pmu_event_iter_fn fn,
@@ -999,6 +1121,21 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
return 0;
}

+int pmu_layouts_table__for_each_layout(const struct pmu_layouts_table *table,
+ pmu_layout_iter_fn fn,
+ void *data) {
+ for (size_t i = 0; i < table->length; i++) {
+ struct pmu_layout pm;
+ int ret;
+
+ decompress_layout(table->entries[i].offset, &pm);
+ ret = fn(&pm, data);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu)
{
static struct {
@@ -1094,6 +1231,33 @@ const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pm
return NULL;
}

+const struct pmu_layouts_table *perf_pmu__find_layouts_table(struct perf_pmu *pmu)
+{
+ const struct pmu_layouts_table *table = NULL;
+ char *cpuid = perf_pmu__getcpuid(pmu);
+ int i;
+
+ /* on some platforms which uses cpus map, cpuid can be NULL for
+ * PMUs other than CORE PMUs.
+ */
+ if (!cpuid)
+ return NULL;
+
+ i = 0;
+ for (;;) {
+ const struct pmu_events_map *map = &pmu_events_map[i++];
+ if (!map->arch)
+ break;
+
+ if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
+ table = &map->layout_table;
+ break;
+ }
+ }
+ free(cpuid);
+ return table;
+}
+
const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
{
for (const struct pmu_events_map *tables = &pmu_events_map[0];
@@ -1115,6 +1279,16 @@ const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const
}
return NULL;
}
+const struct pmu_layouts_table *find_core_layouts_table(const char *arch, const char *cpuid)
+{
+ for (const struct pmu_events_map *tables = &pmu_events_map[0];
+ tables->arch;
+ tables++) {
+ if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
+ return &tables->layout_table;
+ }
+ return NULL;
+}

int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
{
@@ -1143,6 +1317,19 @@ int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
return 0;
}

+int pmu_for_each_core_layout(pmu_layout_iter_fn fn, void *data)
+{
+ for (const struct pmu_events_map *tables = &pmu_events_map[0];
+ tables->arch;
+ tables++) {
+ int ret = pmu_layouts_table__for_each_layout(&tables->layout_table, fn, data);
+
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
const struct pmu_events_table *find_sys_events_table(const char *name)
{
for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
@@ -1299,6 +1486,7 @@ struct pmu_table_entry {
ftw(arch_path, [], process_one_file)
print_pending_events()
print_pending_metrics()
+ print_pending_pmu_counter_layout_table()

print_mapping_table(archs)
print_system_mapping_table()
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index f5aa96f1685c..5b42a18693cf 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -45,6 +45,11 @@ struct pmu_event {
const char *desc;
const char *topic;
const char *long_desc;
+ /**
+ * The list of counter(s) the event could be collected on.
+ * eg., "0,1,2,3,4,5,6,7".
+ */
+ const char *counters_list;
const char *pmu;
const char *unit;
bool perpkg;
@@ -67,8 +72,18 @@ struct pmu_metric {
enum metric_event_groups event_grouping;
};

+struct pmu_layout {
+ const char *pmu;
+ const char *desc;
+ /** Total number of generic counters*/
+ int counters_num_gp;
+ /** Total number of fixed counters. Set to zero if no fixed counter on the unit.*/
+ int counters_num_fixed;
+};
+
struct pmu_events_table;
struct pmu_metrics_table;
+struct pmu_layouts_table;

typedef int (*pmu_event_iter_fn)(const struct pmu_event *pe,
const struct pmu_events_table *table,
@@ -78,15 +93,21 @@ typedef int (*pmu_metric_iter_fn)(const struct pmu_metric *pm,
const struct pmu_metrics_table *table,
void *data);

+typedef int (*pmu_layout_iter_fn)(const struct pmu_layout *pm,
+ void *data);
+
int pmu_events_table__for_each_event(const struct pmu_events_table *table,
struct perf_pmu *pmu,
pmu_event_iter_fn fn,
void *data);
int pmu_events_table__find_event(const struct pmu_events_table *table,
- struct perf_pmu *pmu,
- const char *name,
- pmu_event_iter_fn fn,
- void *data);
+ struct perf_pmu *pmu,
+ const char *name,
+ pmu_event_iter_fn fn,
+ void *data);
+int pmu_layouts_table__for_each_layout(const struct pmu_layouts_table *table,
+ pmu_layout_iter_fn fn,
+ void *data);
size_t pmu_events_table__num_events(const struct pmu_events_table *table,
struct perf_pmu *pmu);

@@ -95,10 +116,13 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, pm

const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu);
const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu);
+const struct pmu_layouts_table *perf_pmu__find_layouts_table(struct perf_pmu *pmu);
const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid);
const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid);
+const struct pmu_layouts_table *find_core_layouts_table(const char *arch, const char *cpuid);
int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data);
int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data);
+int pmu_for_each_core_layout(pmu_layout_iter_fn fn, void *data);

const struct pmu_events_table *find_sys_events_table(const char *name);
const struct pmu_metrics_table *find_sys_metrics_table(const char *name);
--
2.42.0


2024-04-12 21:09:35

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 06/16] perf stat: Add functions to get counter info

From: Weilin Wang <[email protected]>

Add data structure metricgroup__pmu_counters to represent hardware counters
available in the system.

Add functions to parse pmu-events and create the list of pmu_info_list to
hold the counter information of the system.

Add functions to free pmu_info_list and event_info_list before exit
grouping for hardware-grouping method

This method would fall back to normal grouping when event json files do not
support hardware aware grouping.

Reviewed-by: Ian Rogers <[email protected]>
Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/util/metricgroup.c | 115 +++++++++++++++++++++++++++++++++-
1 file changed, 112 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 1a7ac17f7ae1..78a5410cdc09 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -183,6 +183,21 @@ struct metricgroup__event_info {
DECLARE_BITMAP(counters, NR_COUNTERS);
};

+/**
+ * A node is the counter availability of a pmu.
+ * This info is built up at the beginning from JSON file and
+ * used as a reference in metric grouping process.
+ */
+struct metricgroup__pmu_counters {
+ struct list_head nd;
+ /** The name of the pmu the event collected on. */
+ const char *name;
+ /** The number of gp counters in the pmu. */
+ size_t counters_num_gp;
+ /** The number of fixed counters in the pmu. */
+ size_t counters_num_fixed;
+};
+
/**
* Each group is one node in the group string list.
*/
@@ -1534,6 +1549,40 @@ static int parse_counter(const char *counter,
return 0;
}

+static void metricgroup__event_info__delete(struct metricgroup__event_info *e)
+{
+ zfree(&e->name);
+ zfree(&e->pmu_name);
+ free(e);
+}
+
+static void metricgroup__pmu_counter__delete(struct metricgroup__pmu_counters *p)
+{
+ zfree(&p->name);
+ free(p);
+}
+
+static void metricgroup__free_event_info(struct list_head
+ *event_info_list)
+{
+ struct metricgroup__event_info *e, *tmp;
+
+ list_for_each_entry_safe(e, tmp, event_info_list, nd) {
+ list_del_init(&e->nd);
+ metricgroup__event_info__delete(e);
+ }
+}
+
+static void metricgroup__free_pmu_info(struct list_head *pmu_info_list)
+{
+ struct metricgroup__pmu_counters *p, *tmp;
+
+ list_for_each_entry_safe(p, tmp, pmu_info_list, nd) {
+ list_del_init(&p->nd);
+ metricgroup__pmu_counter__delete(p);
+ }
+}
+
static struct metricgroup__event_info *event_info__new(const char *name,
const char *pmu_name,
const char *counter,
@@ -1589,7 +1638,9 @@ static int metricgroup__add_metric_event_callback(const struct pmu_event *pe,
struct metricgroup__add_metric_event_data *d = data;

if (!strcasecmp(pe->name, d->event_name)) {
- event = event_info__new(d->event_id, pe->pmu, pe->counter, /*free_counter=*/false);
+ if (!pe->counters_list)
+ return -EINVAL;
+ event = event_info__new(d->event_id, pe->pmu, pe->counters_list, /*free_counter=*/false);
if (!event)
return -ENOMEM;
list_add(&event->nd, d->list);
@@ -1630,7 +1681,7 @@ static int get_metricgroup_events(const char *full_id,
.event_name = id,
.event_id = full_id,
};
- ret = pmu_events_table_for_each_event(table,
+ ret = pmu_events_table__for_each_event(table, /*pmu=*/NULL,
metricgroup__add_metric_event_callback, &data);
}

@@ -1639,6 +1690,59 @@ static int get_metricgroup_events(const char *full_id,
return ret;
}

+static struct metricgroup__pmu_counters *pmu_layout__new(const struct pmu_layout *pl)
+{
+ struct metricgroup__pmu_counters *l;
+
+ l = zalloc(sizeof(*l));
+
+ if (!l)
+ return NULL;
+
+ l->name = strdup(pl->pmu);
+ if (!l->name)
+ return NULL;
+ l->counters_num_gp = pl->counters_num_gp;
+ l->counters_num_fixed = pl->counters_num_fixed;
+ pr_debug("create new pmu_layout: [pmu]=%s, [gp_size]=%ld, [fixed_size]=%ld\n",
+ l->name, l->counters_num_gp, l->counters_num_fixed);
+ return l;
+}
+
+static int metricgroup__add_pmu_layout_callback(const struct pmu_layout *pl,
+ void *data)
+{
+ struct metricgroup__pmu_counters *pmu;
+ struct list_head *d = data;
+ int ret = 0;
+
+ pmu = pmu_layout__new(pl);
+ if (!pmu)
+ return -ENOMEM;
+ list_add(&pmu->nd, d);
+ return ret;
+}
+
+/**
+ * get_pmu_counter_layouts - Find counter info of the architecture from
+ * the pmu_layouts table
+ * @pmu_info_list: the list that the new counter info of a pmu is added to.
+ * @table: pmu_layouts table that is searched for counter info.
+ */
+static int get_pmu_counter_layouts(struct list_head *pmu_info_list,
+ const struct pmu_layouts_table
+ *table)
+{
+ LIST_HEAD(list);
+ int ret;
+
+ ret = pmu_layouts_table__for_each_layout(table,
+ metricgroup__add_pmu_layout_callback, &list);
+
+ list_splice(&list, pmu_info_list);
+ return ret;
+}
+
/**
* hw_aware_build_grouping - Build event groupings by reading counter
* requirement of the events and counter available on the system from
@@ -1657,6 +1761,7 @@ static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,
LIST_HEAD(event_info_list);
size_t bkt;
const struct pmu_events_table *etable = perf_pmu__find_events_table(NULL);
+ const struct pmu_layouts_table *ltable = perf_pmu__find_layouts_table(NULL);

#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
hashmap__for_each_entry(ctx->ids, cur, bkt) {
@@ -1666,9 +1771,13 @@ static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,

ret = get_metricgroup_events(id, etable, &event_info_list);
if (ret)
- return ret;
+ goto err_out;
}
+ ret = get_pmu_counter_layouts(&pmu_info_list, ltable);

+err_out:
+ metricgroup__free_event_info(&event_info_list);
+ metricgroup__free_pmu_info(&pmu_info_list);
return ret;
#undef RETURN_IF_NON_ZERO
}
--
2.42.0


2024-04-12 21:09:42

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 09/16] perf stat: Add function to handle special events in hardware-grouping

From: Weilin Wang <[email protected]>

There are some special events like topdown events and TSC that are not
described in pmu-event JSON files. Add support to handle this type of
events. This should be considered as a temporary solution because including
these events in JSON files would be a better solution.

Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/util/metricgroup.c | 38 ++++++++++++++++++++++++++++++++++-
1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 04d988ace734..681aacc15787 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -162,6 +162,20 @@ struct metric {

/* Maximum number of counters per PMU*/
#define NR_COUNTERS 16
+/* Special events that are not described in pmu-event JSON files.
+ * topdown-* and TSC use dedicated registers, set as free
+ * counter for grouping purpose
+ */
+enum special_events {
+ TOPDOWN = 0,
+ TSC = 1,
+ SPECIAL_EVENT_MAX,
+};
+
+static const char *const special_event_names[SPECIAL_EVENT_MAX] = {
+ "topdown-",
+ "TSC",
+};

/**
* An event used in a metric. This info is for metric grouping.
@@ -2142,6 +2156,15 @@ static int create_grouping(struct list_head *pmu_info_list,
return ret;
};

+static bool is_special_event(const char *id)
+{
+ for (int i = 0; i < SPECIAL_EVENT_MAX; i++) {
+ if (!strncmp(id, special_event_names[i], strlen(special_event_names[i])))
+ return true;
+ }
+ return false;
+}
+
/**
* hw_aware_build_grouping - Build event groupings by reading counter
* requirement of the events and counter available on the system from
@@ -2166,6 +2189,17 @@ static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,
hashmap__for_each_entry(ctx->ids, cur, bkt) {
const char *id = cur->pkey;

+ if (is_special_event(id)) {
+ struct metricgroup__event_info *event;
+
+ event = event_info__new(id, "default_core", "0",
+ /*free_counter=*/true);
+ if (!event)
+ goto err_out;
+
+ list_add(&event->nd, &event_info_list);
+ continue;
+ }
ret = get_metricgroup_events(id, etable, &event_info_list);
if (ret)
goto err_out;
@@ -2636,8 +2670,10 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
ret = hw_aware_parse_groups(perf_evlist, pmu, str,
metric_no_threshold, user_requested_cpu_list, system_wide,
/*fake_pmu=*/NULL, metric_events, table);
- if (!ret)
+ if (!ret) {
+ pr_info("Hardware aware grouping completed\n");
return 0;
+ }
}

return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge,
--
2.42.0


2024-04-12 21:09:54

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 07/16] perf stat: Add functions to create new group and assign events into groups

From: Weilin Wang <[email protected]>

Add struct metricgroup__pmu_group_list to hold the lists of groups from
different PMUs. Each PMU has one separate list.

Add struct metricgroup__group as one node (one group in the grouping
result) of the metricgroup__pmu_group_list. It uses two bitmaps to log
counter availabilities(gp counters and fixed counters).

Add functions to create group and assign event into the groups based on the
event restrictions (struct metricgroup__event_info) and counter
availability (pmu_info_list and bitmaps). New group is inserted into the
list of groups.

Add functions to handle counter bitmaps. Add functions do find and insert
operations to handle inserting event into groups.

Add function to fill all bits of one counter bitmap. Add functions to
create new groups when no counter is available in all the existing groups.

Reviewed-by: Ian Rogers <[email protected]>
Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/util/metricgroup.c | 312 ++++++++++++++++++++++++++++++++++
1 file changed, 312 insertions(+)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 78a5410cdc09..88c86664c90c 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -197,6 +197,41 @@ struct metricgroup__pmu_counters {
/** The number of fixed counters in the pmu. */
size_t counters_num_fixed;
};
+/**
+ * A list of event groups for this pmu.
+ * This is updated during the grouping.
+ */
+struct metricgroup__pmu_group_list {
+ struct list_head nd;
+ /** The name of the pmu(/core) the events collected on. */
+ const char *pmu_name;
+ /** The number of gp counters in the pmu(/core). */
+ size_t counters_num_gp;
+ /** The number of fixed counters in the pmu(/core) if applicable. */
+ size_t counters_num_fixed;
+ /** Head to the list of groups using this pmu(/core)*/
+ struct list_head group_head;
+};
+/**
+ * This is one node in the metricgroup__pmu_group_list.
+ * It represents on group.
+ */
+struct metricgroup__group {
+ struct list_head nd;
+ /** The bitmaps represent availability of the counters.
+ * They are updated once the corresponding counter is used by
+ * an event (event inserted into the group).
+ */
+ DECLARE_BITMAP(gp_counters, NR_COUNTERS);
+ DECLARE_BITMAP(fixed_counters, NR_COUNTERS);
+ /** Head to the list of event names in this group*/
+ struct list_head event_head;
+};
+
+struct metricgroup__group_events {
+ struct list_head nd;
+ const char *event_name;
+};

/**
* Each group is one node in the group string list.
@@ -1490,6 +1525,34 @@ static int set_counter_bitmap(int pos, unsigned long *bitmap)
return 0;
}

+/**
+ * Returns 0 on success. Finds the last counter that is not used in pmu_counters
+ * and supports the event, included in event_counters.
+ */
+static int find_counter_bitmap(const unsigned long *pmu_counters,
+ const unsigned long *event_counters,
+ unsigned long *bit)
+{
+ /*It is helpful to assign from the highest bit because some events can
+ *only be collected using GP0-3.
+ */
+ unsigned long find_bit = find_last_and_bit(pmu_counters, event_counters, NR_COUNTERS);
+
+ if (find_bit == NR_COUNTERS)
+ return -ERANGE;
+ *bit = find_bit;
+ return 0;
+}
+
+static int use_counter_bitmap(unsigned long *bitmap,
+ unsigned long find_bit)
+{
+ if (find_bit >= NR_COUNTERS)
+ return -EINVAL;
+ __clear_bit(find_bit, bitmap);
+ return 0;
+}
+
static int parse_fixed_counter(const char *counter,
unsigned long *bitmap,
bool *fixed)
@@ -1562,6 +1625,50 @@ static void metricgroup__pmu_counter__delete(struct metricgroup__pmu_counters *p
free(p);
}

+static void metricgroup__group_event__delete(struct metricgroup__group_events *g)
+{
+ zfree(&g->event_name);
+ free(g);
+}
+
+static void group_event_list_free(struct metricgroup__group *groups)
+{
+ struct metricgroup__group_events *e, *tmp;
+
+ list_for_each_entry_safe(e, tmp, &groups->event_head, nd) {
+ list_del_init(&e->nd);
+ metricgroup__group_event__delete(e);
+ }
+}
+
+static void group_list_free(struct metricgroup__pmu_group_list *groups)
+{
+ struct metricgroup__group *g, *tmp;
+
+ list_for_each_entry_safe(g, tmp, &groups->group_head, nd) {
+ list_del_init(&g->nd);
+ group_event_list_free(g);
+ free(g);
+ }
+}
+
+static void metricgroup__pmu_group_list__delete(struct metricgroup__pmu_group_list *g)
+{
+ group_list_free(g);
+ zfree(&g->pmu_name);
+ free(g);
+}
+
+static void metricgroup__free_group_list(struct list_head *groups)
+{
+ struct metricgroup__pmu_group_list *g, *tmp;
+
+ list_for_each_entry_safe(g, tmp, groups, nd) {
+ list_del_init(&g->nd);
+ metricgroup__pmu_group_list__delete(g);
+ }
+}
+
static void metricgroup__free_event_info(struct list_head
*event_info_list)
{
@@ -1743,6 +1850,207 @@ static int get_pmu_counter_layouts(struct list_head *pmu_info_list,
return ret;
}

+static int fill_counter_bitmap(unsigned long *bitmap, int start, int size)
+{
+ int ret;
+
+ bitmap_zero(bitmap, NR_COUNTERS);
+
+ for (int pos = start; pos < start + size; pos++) {
+ ret = set_counter_bitmap(pos, bitmap);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/**
+ * Find if there is a counter available for event e in current_group. If a
+ * counter is available, use this counter by filling the bit in the correct
+ * counter bitmap. Otherwise, return error (-ERANGE).
+ */
+static int find_and_set_counters(struct metricgroup__event_info *e,
+ struct metricgroup__group *current_group)
+{
+ int ret;
+ unsigned long find_bit = 0;
+
+ if (e->free_counter)
+ return 0;
+ if (e->fixed_counter) {
+ ret = find_counter_bitmap(current_group->fixed_counters, e->counters,
+ &find_bit);
+ if (ret)
+ return ret;
+ pr_debug("found counter for [event]=%s [e->fixed_counters]=%lu\n",
+ e->name, *current_group->fixed_counters);
+ ret = use_counter_bitmap(current_group->fixed_counters, find_bit);
+ } else {
+ ret = find_counter_bitmap(current_group->gp_counters, e->counters,
+ &find_bit);
+ if (ret)
+ return ret;
+ pr_debug("found counter for [event]=%s [e->gp_counters]=%lu\n",
+ e->name, *current_group->gp_counters);
+ ret = use_counter_bitmap(current_group->gp_counters, find_bit);
+ }
+ return ret;
+}
+
+static int _insert_event(struct metricgroup__event_info *e,
+ struct metricgroup__group *group)
+{
+ struct metricgroup__group_events *event = malloc(sizeof(struct metricgroup__group_events));
+
+ if (!event)
+ return -ENOMEM;
+ event->event_name = strdup(e->name);
+ if (!event->event_name)
+ return -ENOMEM;
+ if (e->fixed_counter)
+ list_add(&event->nd, &group->event_head);
+ else
+ list_add_tail(&event->nd, &group->event_head);
+ return 0;
+}
+
+/**
+ * Insert the new_group node at the end of the group list.
+ */
+static int insert_new_group(struct list_head *head,
+ struct metricgroup__group *new_group,
+ size_t counters_num_gp,
+ size_t counters_num_fixed)
+{
+ INIT_LIST_HEAD(&new_group->event_head);
+ fill_counter_bitmap(new_group->gp_counters, 0, counters_num_gp);
+ fill_counter_bitmap(new_group->fixed_counters, 0, counters_num_fixed);
+ list_add_tail(&new_group->nd, head);
+ return 0;
+}
+
+/**
+ * Insert event e into a group capable to include it
+ *
+ */
+static int insert_event_to_group(struct metricgroup__event_info *e,
+ struct metricgroup__pmu_group_list *pmu_group_head)
+{
+ struct metricgroup__group *g;
+ int ret;
+ struct list_head *head;
+
+ list_for_each_entry(g, &pmu_group_head->group_head, nd) {
+ ret = find_and_set_counters(e, g);
+ if (!ret) { /* return if successfully find and set counter*/
+ ret = _insert_event(e, g);
+ return ret;
+ }
+ }
+ /*
+ * We were not able to find an existing group to insert this event.
+ * Continue to create a new group and insert the event in it.
+ */
+ {
+ struct metricgroup__group *current_group =
+ zalloc(sizeof(struct metricgroup__group));
+
+ if (!current_group)
+ return -ENOMEM;
+ pr_debug("create_new_group for [event] %s\n", e->name);
+
+ head = &pmu_group_head->group_head;
+ ret = insert_new_group(head, current_group, pmu_group_head->counters_num_gp,
+ pmu_group_head->counters_num_fixed);
+ if (ret)
+ return ret;
+ ret = find_and_set_counters(e, current_group);
+ if (ret)
+ return ret;
+ ret = _insert_event(e, current_group);
+ }
+
+ return ret;
+}
+
+/**
+ * assign_event_grouping - Assign an event into a group. If existing group
+ * cannot include it, create a new group and insert the event to it.
+ */
+static int assign_event_grouping(struct metricgroup__event_info *e,
+ struct list_head *pmu_info_list,
+ struct list_head *groups)
+{
+ int ret = 0;
+
+ struct metricgroup__pmu_group_list *g = NULL;
+ struct metricgroup__pmu_group_list *pmu_group_head = NULL;
+
+ list_for_each_entry(g, groups, nd) {
+ if (!strcasecmp(g->pmu_name, e->pmu_name)) {
+ pr_debug("found group for event %s in pmu %s\n", e->name, g->pmu_name);
+ pmu_group_head = g;
+ break;
+ }
+ }
+ if (!pmu_group_head) {
+ struct metricgroup__pmu_counters *p;
+
+ pmu_group_head = malloc(sizeof(struct metricgroup__pmu_group_list));
+ if (!pmu_group_head)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&pmu_group_head->group_head);
+ pr_debug("create new group for event %s in pmu %s\n", e->name, e->pmu_name);
+ pmu_group_head->pmu_name = strdup(e->pmu_name);
+ if (!pmu_group_head->pmu_name)
+ return -ENOMEM;
+ list_for_each_entry(p, pmu_info_list, nd) {
+ if (!strcasecmp(p->name, e->pmu_name)) {
+ pmu_group_head->counters_num_gp = p->counters_num_gp;
+ pmu_group_head->counters_num_fixed = p->counters_num_fixed;
+ break;
+ }
+ }
+ list_add_tail(&pmu_group_head->nd, groups);
+ }
+
+ ret = insert_event_to_group(e, pmu_group_head);
+ return ret;
+}
+
+/**
+ * create_grouping - Create a list of groups and place all the events of
+ * event_info_list into these groups.
+ * @pmu_info_list: the list of PMU units info based on pmu-events data, used for
+ * creating new groups.
+ * @event_info_list: the list of events to be grouped.
+ * @groupings: the list of groups with events placed in.
+ * @modifier: any modifiers added to the events.
+ */
+static int create_grouping(struct list_head *pmu_info_list,
+ struct list_head *event_info_list,
+ struct list_head *groupings __maybe_unused,
+ const char *modifier __maybe_unused)
+{
+ int ret = 0;
+ struct metricgroup__event_info *e;
+ LIST_HEAD(groups);
+ char *bit_buf = malloc(NR_COUNTERS);
+
+ //TODO: for each new core group, we should consider to add events that uses fixed counters
+ list_for_each_entry(e, event_info_list, nd) {
+ bitmap_scnprintf(e->counters, NR_COUNTERS, bit_buf, NR_COUNTERS);
+ pr_debug("Event name %s, [pmu]=%s, [counters]=%s\n", e->name,
+ e->pmu_name, bit_buf);
+ ret = assign_event_grouping(e, pmu_info_list, &groups);
+ if (ret)
+ goto out;
+ }
+out:
+ metricgroup__free_group_list(&groups);
+ return ret;
+};
+
/**
* hw_aware_build_grouping - Build event groupings by reading counter
* requirement of the events and counter available on the system from
@@ -1774,6 +2082,10 @@ static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,
goto err_out;
}
ret = get_pmu_counter_layouts(&pmu_info_list, ltable);
+ if (ret)
+ goto err_out;
+ ret = create_grouping(&pmu_info_list, &event_info_list, groupings,
+ modifier);

err_out:
metricgroup__free_event_info(&event_info_list);
--
2.42.0


2024-04-12 21:10:18

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 10/16] perf stat: Add function to combine metrics for hardware-grouping

From: Weilin Wang <[email protected]>

This function is very similar to the existing build_combined_expr_ctx().
Should be able to reuse current function instead of adding a new one. Will
fix this later.

Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/util/metricgroup.c | 47 ++++++++++++++++++++++++++++++++---
1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 681aacc15787..b9e46dff1e17 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1528,6 +1528,46 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
return ret;
}

+/**
+ * hw_aware_build_combined_expr_ctx - Make an expr_parse_ctx with all !group_events
+ * metric IDs, as the IDs are held in a set,
+ * duplicates will be removed.
+ * @metric_list: List to take metrics from.
+ * @combined: Out argument for result.
+ */
+static int hw_aware_build_combined_expr_ctx(const struct list_head *metric_list,
+ struct expr_parse_ctx **combined)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+ struct metric *m;
+ char *dup;
+ int ret;
+
+ *combined = expr__ctx_new();
+ if (!*combined)
+ return -ENOMEM;
+
+ list_for_each_entry(m, metric_list, nd) {
+ hashmap__for_each_entry(m->pctx->ids, cur, bkt) {
+ pr_debug2("metric: %s\n", m->metric_name);
+ dup = strdup(cur->pkey);
+ if (!dup) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+ ret = expr__add_id(*combined, dup);
+ if (ret)
+ goto err_out;
+ }
+ }
+ return 0;
+err_out:
+ expr__ctx_free(*combined);
+ *combined = NULL;
+ return ret;
+}
+
/**
* set_counter_bitmap - The counter bitmap: [0-15].
*/
@@ -1851,8 +1891,7 @@ static int metricgroup__add_pmu_layout_callback(const struct pmu_layout *pl,
* @table: pmu_layouts table that is searched for counter info.
*/
static int get_pmu_counter_layouts(struct list_head *pmu_info_list,
- const struct pmu_layouts_table
- *table)
+ const struct pmu_layouts_table *table)
{
LIST_HEAD(list);
int ret;
@@ -2288,6 +2327,8 @@ static int hw_aware_parse_ids(struct perf_pmu *fake_pmu,
*out_evlist = parsed_evlist;
parsed_evlist = NULL;
err_out:
+ parse_events_error__exit(&parse_error);
+ evlist__delete(parsed_evlist);
metricgroup__free_grouping_strs(&groupings);
return ret;
}
@@ -2410,7 +2451,7 @@ static int hw_aware_parse_groups(struct evlist *perf_evlist,
if (!metric_no_merge) {
struct expr_parse_ctx *combined = NULL;

- ret = build_combined_expr_ctx(&metric_list, &combined);
+ ret = hw_aware_build_combined_expr_ctx(&metric_list, &combined);

if (!ret && combined && hashmap__size(combined->ids)) {
ret = hw_aware_parse_ids(fake_pmu, combined,
--
2.42.0


2024-04-12 21:11:31

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 13/16] perf stat: Code refactoring in hardware-grouping

From: Weilin Wang <[email protected]>

Decouple the step to generate final grouping strings out from the
build_grouping step so that we could do single metric grouping and then
merge groups if needed later.

Reviewed-by: Ian Rogers <[email protected]>
Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/util/metricgroup.c | 44 +++++++++++++++++------------------
1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 31036035484c..c6db21a2c340 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1939,6 +1939,7 @@ static int find_and_set_counters(struct metricgroup__event_info *e,
if (e->msr != NULL && current_group->msr != NULL && !strcmp(e->msr, current_group->msr)) {
pr_debug("current group uses the required MSR %s already\n", e->msr);
return -ENOSPC;
+ }
if (e->free_counter)
return 0;
if (e->fixed_counter) {
@@ -2062,7 +2063,8 @@ static int assign_event_grouping(struct metricgroup__event_info *e,

list_for_each_entry(g, groups, nd) {
if (!strcasecmp(g->pmu_name, e->pmu_name)) {
- pr_debug("found group for event %s in pmu %s\n", e->name, g->pmu_name);
+ pr_debug("found group header for event %s in pmu %s\n",
+ e->name, g->pmu_name);
pmu_group_head = g;
break;
}
@@ -2193,26 +2195,22 @@ static int hw_aware_metricgroup__build_event_string(struct list_head *group_strs
*/
static int create_grouping(struct list_head *pmu_info_list,
struct list_head *event_info_list,
- struct list_head *groupings,
- const char *modifier)
+ struct list_head *grouping)
{
int ret = 0;
struct metricgroup__event_info *e;
- LIST_HEAD(groups);
char *bit_buf = malloc(NR_COUNTERS);

- //TODO: for each new core group, we should consider to add events that uses fixed counters
+ //TODO: for each new core group, we could consider to add events that
+ //uses fixed counters
list_for_each_entry(e, event_info_list, nd) {
bitmap_scnprintf(e->counters, NR_COUNTERS, bit_buf, NR_COUNTERS);
pr_debug("Event name %s, [pmu]=%s, [counters]=%s\n", e->name,
e->pmu_name, bit_buf);
ret = assign_event_grouping(e, pmu_info_list, grouping);
if (ret)
- goto out;
+ return ret;
}
- ret = hw_aware_metricgroup__build_event_string(groupings, modifier, &groups);
-out:
- metricgroup__free_group_list(&groups);
return ret;
};

@@ -2233,9 +2231,8 @@ static bool is_special_event(const char *id)
* @groupings: header to the list of final event grouping.
* @modifier: any modifiers added to the events.
*/
-static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,
- struct list_head *groupings __maybe_unused,
- const char *modifier __maybe_unused)
+static int hw_aware_build_grouping(struct expr_parse_ctx *ctx,
+ struct list_head *grouping)
{
int ret = 0;
struct hashmap_entry *cur;
@@ -2267,8 +2264,7 @@ static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,
ret = get_pmu_counter_layouts(&pmu_info_list, ltable);
if (ret)
goto err_out;
- ret = create_grouping(&pmu_info_list, &event_info_list, groupings,
- modifier);
+ ret = create_grouping(&pmu_info_list, &event_info_list, grouping);

err_out:
metricgroup__free_event_info(&event_info_list);
@@ -2314,23 +2310,25 @@ static int hw_aware_parse_ids(struct perf_pmu *fake_pmu,
{
struct parse_events_error parse_error;
struct evlist *parsed_evlist;
- LIST_HEAD(groupings);
+ LIST_HEAD(grouping_str);
+ LIST_HEAD(grouping);
struct metricgroup__group_strs *group;
int ret;

*out_evlist = NULL;
- ret = hw_aware_build_grouping(ids, &groupings, modifier);
- if (ret) {
- metricgroup__free_grouping_strs(&groupings);
- return ret;
- }
+ ret = hw_aware_build_grouping(ids, &grouping);
+ if (ret)
+ goto out;
+ ret = hw_aware_metricgroup__build_event_string(&grouping_str, modifier, &grouping);
+ if (ret)
+ goto out;

parsed_evlist = evlist__new();
if (!parsed_evlist) {
ret = -ENOMEM;
goto err_out;
}
- list_for_each_entry(group, &groupings, nd) {
+ list_for_each_entry(group, &grouping_str, nd) {
struct strbuf *events = &group->grouping_str;

pr_debug("Parsing metric events '%s'\n", events->buf);
@@ -2350,7 +2348,9 @@ static int hw_aware_parse_ids(struct perf_pmu *fake_pmu,
err_out:
parse_events_error__exit(&parse_error);
evlist__delete(parsed_evlist);
- metricgroup__free_grouping_strs(&groupings);
+out:
+ metricgroup__free_group_list(&grouping);
+ metricgroup__free_grouping_strs(&grouping_str);
return ret;
}

--
2.42.0


2024-04-12 21:11:58

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 02/16] perf stat: Add basic functions for the hardware aware grouping

From: Weilin Wang <[email protected]>

Add the first set of functions for the hardware aware grouping method. Function
hw_aware_parse_groups() is the entry point of this metric grouping method. It
does metric grouping on a combined list of events and will create a list of
grouping strings as final results of the grouping method. These grouping strings
will be used in the same manner as existing metric grouping process.

This method will fall back to normal grouping when hardware aware grouping
return with err so that perf stat still executes and returns with correct
result.

Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/util/metricgroup.c | 217 +++++++++++++++++++++++++++++++++-
1 file changed, 216 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 11613450725a..8047f03b2b1f 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -159,6 +159,14 @@ struct metric {
struct evlist *evlist;
};

+/**
+ * Each group is one node in the group string list.
+ */
+struct metricgroup__group_strs {
+ struct list_head nd;
+ struct strbuf grouping_str;
+};
+
static void metric__watchdog_constraint_hint(const char *name, bool foot)
{
static bool violate_nmi_constraint;
@@ -1432,6 +1440,101 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
return ret;
}

+/**
+ * hw_aware_build_grouping - Build event groupings by reading counter
+ * requirement of the events and counter available on the system from
+ * pmu-events.
+ * @ctx: the event identifiers parsed from metrics.
+ * @groupings: header to the list of final event grouping.
+ * @modifier: any modifiers added to the events.
+ */
+static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,
+ struct list_head *groupings __maybe_unused,
+ const char *modifier __maybe_unused)
+{
+ int ret = 0;
+
+ pr_debug("This is a placeholder\n");
+ return ret;
+}
+
+static void group_str_free(struct metricgroup__group_strs *g)
+{
+ if (!g)
+ return;
+
+ strbuf_release(&g->grouping_str);
+ free(g);
+}
+
+static void metricgroup__free_grouping_strs(struct list_head
+ *grouping_strs)
+{
+ struct metricgroup__group_strs *g, *tmp;
+
+ list_for_each_entry_safe(g, tmp, grouping_strs, nd) {
+ list_del_init(&g->nd);
+ group_str_free(g);
+ }
+}
+
+/**
+ * hw_aware_parse_ids - Build the event string for the ids and parse them
+ * creating an evlist. The encoded metric_ids are decoded. Events are placed
+ * into groups based on event counter requirements and counter availabilities of
+ * the system.
+ * @metric_no_merge: is metric sharing explicitly disabled.
+ * @fake_pmu: used when testing metrics not supported by the current CPU.
+ * @ids: the event identifiers parsed from a metric.
+ * @modifier: any modifiers added to the events.
+ * @out_evlist: the created list of events.
+ */
+static int hw_aware_parse_ids(struct perf_pmu *fake_pmu,
+ struct expr_parse_ctx *ids, const char *modifier,
+ struct evlist **out_evlist)
+{
+ struct parse_events_error parse_error;
+ struct evlist *parsed_evlist;
+ LIST_HEAD(groupings);
+ struct metricgroup__group_strs *group;
+ int ret;
+
+ *out_evlist = NULL;
+ ret = hw_aware_build_grouping(ids, &groupings, modifier);
+ if (ret) {
+ metricgroup__free_grouping_strs(&groupings);
+ return ret;
+ }
+
+ parsed_evlist = evlist__new();
+ if (!parsed_evlist) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+ list_for_each_entry(group, &groupings, nd) {
+ struct strbuf *events = &group->grouping_str;
+
+ pr_debug("Parsing metric events '%s'\n", events->buf);
+ parse_events_error__init(&parse_error);
+ ret = __parse_events(parsed_evlist, events->buf, /*pmu_filter=*/NULL,
+ &parse_error, fake_pmu, /*warn_if_reordered=*/false);
+ if (ret) {
+ parse_events_error__print(&parse_error, events->buf);
+ goto err_out;
+ }
+ ret = decode_all_metric_ids(parsed_evlist, modifier);
+ if (ret)
+ goto err_out;
+ }
+ *out_evlist = parsed_evlist;
+ parsed_evlist = NULL;
+err_out:
+ parse_events_error__exit(&parse_error);
+ evlist__delete(parsed_evlist);
+ metricgroup__free_grouping_strs(&groupings);
+ return ret;
+}
+
/**
* parse_ids - Build the event string for the ids and parse them creating an
* evlist. The encoded metric_ids are decoded.
@@ -1520,6 +1623,113 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
return ret;
}

+static int hw_aware_parse_groups(struct evlist *perf_evlist,
+ const char *pmu, const char *str,
+ bool metric_no_threshold,
+ const char *user_requested_cpu_list,
+ bool system_wide,
+ struct perf_pmu *fake_pmu,
+ struct rblist *metric_events_list,
+ const struct pmu_metrics_table *table)
+{
+ struct evlist *combined_evlist = NULL;
+ LIST_HEAD(metric_list);
+ struct metric *m;
+ int ret;
+ bool metric_no_group = false;
+ bool metric_no_merge = false;
+
+ if (metric_events_list->nr_entries == 0)
+ metricgroup__rblist_init(metric_events_list);
+ ret = metricgroup__add_metric_list(pmu, str, metric_no_group, metric_no_threshold,
+ user_requested_cpu_list,
+ system_wide, &metric_list, table);
+ if (ret)
+ goto out;
+
+ /* Sort metrics from largest to smallest. */
+ list_sort(NULL, &metric_list, metric_list_cmp);
+
+ if (!metric_no_merge) {
+ struct expr_parse_ctx *combined = NULL;
+
+ ret = build_combined_expr_ctx(&metric_list, &combined);
+
+ if (!ret && combined && hashmap__size(combined->ids)) {
+ ret = hw_aware_parse_ids(fake_pmu, combined,
+ /*modifier=*/NULL,
+ &combined_evlist);
+ }
+
+ if (combined)
+ expr__ctx_free(combined);
+ if (ret)
+ goto out;
+ }
+
+ list_for_each_entry(m, &metric_list, nd) {
+ struct metric_expr *expr;
+ struct metric_event *me;
+ struct evsel **metric_events;
+
+ ret = setup_metric_events(fake_pmu ? "all" : m->pmu, m->pctx->ids,
+ combined_evlist, &metric_events);
+ if (ret) {
+ pr_debug("Cannot resolve IDs for %s: %s\n",
+ m->metric_name, m->metric_expr);
+ goto out;
+ }
+
+ me = metricgroup__lookup(metric_events_list, metric_events[0], true);
+
+ expr = malloc(sizeof(struct metric_expr));
+ if (!expr) {
+ ret = -ENOMEM;
+ free(metric_events);
+ goto out;
+ }
+
+ expr->metric_refs = m->metric_refs;
+ m->metric_refs = NULL;
+ expr->metric_expr = m->metric_expr;
+ if (m->modifier) {
+ char *tmp;
+
+ if (asprintf(&tmp, "%s:%s", m->metric_name, m->modifier) < 0)
+ expr->metric_name = NULL;
+ else
+ expr->metric_name = tmp;
+ } else {
+ expr->metric_name = strdup(m->metric_name);
+ }
+
+ if (!expr->metric_name) {
+ ret = -ENOMEM;
+ free(metric_events);
+ goto out;
+ }
+ expr->metric_threshold = m->metric_threshold;
+ expr->metric_unit = m->metric_unit;
+ expr->metric_events = metric_events;
+ expr->runtime = m->pctx->sctx.runtime;
+ list_add(&expr->nd, &me->head);
+ }
+
+ if (combined_evlist) {
+ evlist__splice_list_tail(perf_evlist, &combined_evlist->core.entries);
+ evlist__delete(combined_evlist);
+ }
+
+ list_for_each_entry(m, &metric_list, nd) {
+ if (m->evlist)
+ evlist__splice_list_tail(perf_evlist, &m->evlist->core.entries);
+ }
+
+out:
+ metricgroup__free_metrics(&metric_list);
+ return ret;
+}
+
static int parse_groups(struct evlist *perf_evlist,
const char *pmu, const char *str,
bool metric_no_group,
@@ -1698,10 +1908,15 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
if (!table)
return -EINVAL;
if (hardware_aware_grouping) {
+ int ret;
pr_debug("Use hardware aware grouping instead of traditional metric grouping method\n");
+ ret = hw_aware_parse_groups(perf_evlist, pmu, str,
+ metric_no_threshold, user_requested_cpu_list, system_wide,
+ /*fake_pmu=*/NULL, metric_events, table);
+ if (!ret)
+ return 0;
}

-
return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge,
metric_no_threshold, user_requested_cpu_list, system_wide,
/*fake_pmu=*/NULL, metric_events, table);
--
2.42.0


2024-04-12 21:12:31

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 16/16] perf stat: Add hardware-grouping cmd option to perf stat

From: Weilin Wang <[email protected]>

Add a cmd option to allow user to choose this new metric grouping method.

$ perf stat -M TopdownL1 -a --hardware-grouping

Reviewed-by: Ian Rogers <[email protected]>
Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/builtin-stat.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c4a5f0984295..8c33e3f8bc80 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1259,6 +1259,8 @@ static struct option stat_options[] = {
"don't try to share events between metrics in a group"),
OPT_BOOLEAN(0, "metric-no-threshold", &stat_config.metric_no_threshold,
"disable adding events for the metric threshold calculation"),
+ OPT_BOOLEAN(0, "hardware-grouping", &stat_config.hardware_aware_grouping,
+ "Use hardware aware metric grouping method"),
OPT_BOOLEAN(0, "topdown", &topdown_run,
"measure top-down statistics"),
OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
--
2.42.0


2024-04-12 21:13:30

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 08/16] perf stat: Add build string function and topdown events handling in hardware-grouping

From: Weilin Wang <[email protected]>

Add the function to generate final grouping strings. This function is
very similar to the existing metricgroup__build_event_string() function.
The difference is that the input data includes a list of grouping lists.

Reviewed-by: Ian Rogers <[email protected]>
Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/util/metricgroup.c | 97 +++++++++++++++++++++++++++++++++--
1 file changed, 93 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 88c86664c90c..04d988ace734 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -2018,6 +2018,96 @@ static int assign_event_grouping(struct metricgroup__event_info *e,
return ret;
}

+static int hw_aware_metricgroup__build_event_string(struct list_head *group_strs,
+ const char *modifier,
+ struct list_head *groups)
+{
+ struct metricgroup__pmu_group_list *p;
+ struct metricgroup__group *g;
+ struct metricgroup__group_events *ge;
+ bool no_group = true;
+ int ret = 0;
+
+#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
+
+ list_for_each_entry(p, groups, nd) {
+ list_for_each_entry(g, &p->group_head, nd) {
+ struct strbuf *events;
+ struct metricgroup__group_strs *new_group_str =
+ malloc(sizeof(struct metricgroup__group_strs));
+
+ if (!new_group_str)
+ return -ENOMEM;
+ strbuf_init(&new_group_str->grouping_str, 0);
+ events = &new_group_str->grouping_str;
+ ret = strbuf_addch(events, '{');
+ RETURN_IF_NON_ZERO(ret);
+ no_group = true;
+ list_for_each_entry(ge, &g->event_head, nd) {
+ const char *sep, *rsep, *id = ge->event_name;
+
+ pr_debug("found event %s\n", id);
+
+ /* Separate events with commas and open the group if necessary. */
+ if (!no_group) {
+ ret = strbuf_addch(events, ',');
+ RETURN_IF_NON_ZERO(ret);
+ }
+ /*
+ * Encode the ID as an event string. Add a qualifier for
+ * metric_id that is the original name except with characters
+ * that parse-events can't parse replaced. For example,
+ * 'msr@tsc@' gets added as msr/tsc,metric-id=msr!3tsc!3/
+ */
+ sep = strchr(id, '@');
+ if (sep) {
+ ret = strbuf_add(events, id, sep - id);
+ RETURN_IF_NON_ZERO(ret);
+ ret = strbuf_addch(events, '/');
+ RETURN_IF_NON_ZERO(ret);
+ rsep = strrchr(sep, '@');
+ ret = strbuf_add(events, sep + 1, rsep - sep - 1);
+ RETURN_IF_NON_ZERO(ret);
+ ret = strbuf_addstr(events, ",metric-id=");
+ RETURN_IF_NON_ZERO(ret);
+ sep = rsep;
+ } else {
+ sep = strchr(id, ':');
+ if (sep) {
+ ret = strbuf_add(events, id, sep - id);
+ RETURN_IF_NON_ZERO(ret);
+ } else {
+ ret = strbuf_addstr(events, id);
+ RETURN_IF_NON_ZERO(ret);
+ }
+ ret = strbuf_addstr(events, "/metric-id=");
+ RETURN_IF_NON_ZERO(ret);
+ }
+ ret = encode_metric_id(events, id);
+ RETURN_IF_NON_ZERO(ret);
+ ret = strbuf_addstr(events, "/");
+ RETURN_IF_NON_ZERO(ret);
+
+ if (sep) {
+ ret = strbuf_addstr(events, sep + 1);
+ RETURN_IF_NON_ZERO(ret);
+ }
+ if (modifier) {
+ ret = strbuf_addstr(events, modifier);
+ RETURN_IF_NON_ZERO(ret);
+ }
+ no_group = false;
+ }
+ ret = strbuf_addf(events, "}");
+ RETURN_IF_NON_ZERO(ret);
+ pr_debug("events-buf: %s\n", events->buf);
+ list_add_tail(&new_group_str->nd, group_strs);
+ }
+ }
+ return ret;
+#undef RETURN_IF_NON_ZERO
+}
+
/**
* create_grouping - Create a list of groups and place all the events of
* event_info_list into these groups.
@@ -2029,8 +2119,8 @@ static int assign_event_grouping(struct metricgroup__event_info *e,
*/
static int create_grouping(struct list_head *pmu_info_list,
struct list_head *event_info_list,
- struct list_head *groupings __maybe_unused,
- const char *modifier __maybe_unused)
+ struct list_head *groupings,
+ const char *modifier)
{
int ret = 0;
struct metricgroup__event_info *e;
@@ -2046,6 +2136,7 @@ static int create_grouping(struct list_head *pmu_info_list,
if (ret)
goto out;
}
+ ret = hw_aware_metricgroup__build_event_string(groupings, modifier, &groups);
out:
metricgroup__free_group_list(&groups);
return ret;
@@ -2075,8 +2166,6 @@ static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,
hashmap__for_each_entry(ctx->ids, cur, bkt) {
const char *id = cur->pkey;

- pr_debug("found event %s\n", id);
-
ret = get_metricgroup_events(id, etable, &event_info_list);
if (ret)
goto err_out;
--
2.42.0


2024-04-12 21:13:41

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 11/16] perf stat: Add partial support on MSR in hardware-grouping

From: Weilin Wang <[email protected]>

Add MSR usage into consideration when grouping. Each group can only
include one event that requires one specific MSR. Currently, we only
support events that requries one MSR. For some OCR events that have
multiple MSRs in their MSRIndex field, this commit will treat them as
one "large MSR". We're planning to improve this part in future.

Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/pmu-events/jevents.py | 4 +++-
tools/perf/pmu-events/pmu-events.h | 6 ++++++
tools/perf/util/metricgroup.c | 27 ++++++++++++++++++++++-----
3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index 7cfd86d77fea..66531c2df224 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -54,7 +54,9 @@ _json_event_attributes = [
# Short things in alphabetical order.
'compat', 'deprecated', 'perpkg', 'unit',
# Longer things (the last won't be iterated over during decompress).
- 'long_desc'
+ 'long_desc',
+ # MSRIndex required by the event. NULL if no MSR is required.
+ 'msr'
]

# Attributes that are in pmu_unit_layout.
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index 5b42a18693cf..76ec2b431dce 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -54,6 +54,12 @@ struct pmu_event {
const char *unit;
bool perpkg;
bool deprecated;
+ /*
+ * MSR is another resource that restricts grouping. Currently, we
+ * support only MSRIndex 0x3F6 and 0x3F7. TODO: add support for all the
+ * MSRs related to event grouping.
+ */
+ const char *msr;
};

struct pmu_metric {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index b9e46dff1e17..9548654c9f6d 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -193,6 +193,7 @@ struct metricgroup__event_info {
* during the event grouping.
*/
bool free_counter;
+ const char *msr;
/** The counters the event allowed to be collected on. */
DECLARE_BITMAP(counters, NR_COUNTERS);
};
@@ -240,6 +241,7 @@ struct metricgroup__group {
DECLARE_BITMAP(fixed_counters, NR_COUNTERS);
/** Head to the list of event names in this group*/
struct list_head event_head;
+ const char *msr;
};

struct metricgroup__group_events {
@@ -1747,6 +1749,7 @@ static void metricgroup__free_pmu_info(struct list_head *pmu_info_list)
static struct metricgroup__event_info *event_info__new(const char *name,
const char *pmu_name,
const char *counter,
+ const char *msr,
bool free_counter)
{
int ret = 0;
@@ -1764,6 +1767,11 @@ static struct metricgroup__event_info *event_info__new(const char *name,
e->pmu_name = strdup(pmu_name);
if (!e->pmu_name || !e->name)
return NULL;
+ if (msr) {
+ e->msr = strdup(msr);
+ if (!e->msr)
+ return NULL;
+ }
e->free_counter = free_counter;
if (free_counter) {
ret = set_counter_bitmap(0, e->counters);
@@ -1801,7 +1809,8 @@ static int metricgroup__add_metric_event_callback(const struct pmu_event *pe,
if (!strcasecmp(pe->name, d->event_name)) {
if (!pe->counters_list)
return -EINVAL;
- event = event_info__new(d->event_id, pe->pmu, pe->counters_list, /*free_counter=*/false);
+ event = event_info__new(d->event_id, pe->pmu, pe->counters_list,
+ pe->msr, /*free_counter=*/false);
if (!event)
return -ENOMEM;
list_add(&event->nd, d->list);
@@ -1927,7 +1936,9 @@ static int find_and_set_counters(struct metricgroup__event_info *e,
{
int ret;
unsigned long find_bit = 0;
-
+ if (e->msr != NULL && current_group->msr != NULL && !strcmp(e->msr, current_group->msr)) {
+ pr_debug("current group uses the required MSR %s already\n", e->msr);
+ return -ENOSPC;
if (e->free_counter)
return 0;
if (e->fixed_counter) {
@@ -1964,11 +1975,17 @@ static int _insert_event(struct metricgroup__event_info *e,
list_add(&event->nd, &group->event_head);
else
list_add_tail(&event->nd, &group->event_head);
+ if (e->msr != NULL) {
+ group->msr = strdup(e->msr);
+ pr_debug("Add event %s to group, uses MSR %s\n", e->name, e->msr);
+ if (!group->msr)
+ return -ENOMEM;
+ }
return 0;
}

/**
- * Insert the new_group node at the end of the group list.
+ * Initialize the new group and insert it to the end of the group list.
*/
static int insert_new_group(struct list_head *head,
struct metricgroup__group *new_group,
@@ -2185,7 +2202,7 @@ static int create_grouping(struct list_head *pmu_info_list,
bitmap_scnprintf(e->counters, NR_COUNTERS, bit_buf, NR_COUNTERS);
pr_debug("Event name %s, [pmu]=%s, [counters]=%s\n", e->name,
e->pmu_name, bit_buf);
- ret = assign_event_grouping(e, pmu_info_list, &groups);
+ ret = assign_event_grouping(e, pmu_info_list, grouping);
if (ret)
goto out;
}
@@ -2231,7 +2248,7 @@ static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,
if (is_special_event(id)) {
struct metricgroup__event_info *event;

- event = event_info__new(id, "default_core", "0",
+ event = event_info__new(id, "default_core", "0", /*msr=*/NULL,
/*free_counter=*/true);
if (!event)
goto err_out;
--
2.42.0


2024-04-12 21:14:01

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 12/16] perf stat: Handle NMI in hardware-grouping

From: Weilin Wang <[email protected]>

Add an easy nmi watchdog support in grouping. When nmi watchdog is enabled,
we reduce the total num of events could be assigned to one group by 1. A
more efficient solution will be added in later.

Reviewed-by: Ian Rogers <[email protected]>
Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/util/metricgroup.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 9548654c9f6d..31036035484c 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1993,6 +1993,10 @@ static int insert_new_group(struct list_head *head,
size_t counters_num_fixed)
{
INIT_LIST_HEAD(&new_group->event_head);
+ if (sysctl__nmi_watchdog_enabled()) {
+ pr_debug("NMI watchdog is enabled. Reduce num of counters by 1\n");
+ counters_num_gp -= 1;
+ }
fill_counter_bitmap(new_group->gp_counters, 0, counters_num_gp);
fill_counter_bitmap(new_group->fixed_counters, 0, counters_num_fixed);
list_add_tail(&new_group->nd, head);
--
2.42.0


2024-04-12 21:15:22

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 14/16] perf stat: Add tool events support in hardware-grouping

From: Weilin Wang <[email protected]>

Add tool events into default_core grouping strings if find tool events so
that metrics use tool events could be correctly calculated. Need this step
to support TopdownL4-L5.

Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/util/metricgroup.c | 49 ++++++++++++++++++++++++++++++++++-
1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index c6db21a2c340..86b6528e5a44 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -763,6 +763,35 @@ static int decode_all_metric_ids(struct evlist *perf_evlist, const char *modifie
return ret;
}

+/**
+ * get_tool_event_str - Generate and return a string with all the used tool
+ * event names.
+ */
+static int get_tool_event_str(struct strbuf *events,
+ const bool tool_events[PERF_TOOL_MAX],
+ bool *has_tool_event)
+{
+ int i = 0;
+ int ret;
+
+ perf_tool_event__for_each_event(i) {
+ if (tool_events[i]) {
+ const char *tmp = strdup(perf_tool_event__to_str(i));
+
+ if (!tmp)
+ return -ENOMEM;
+ *has_tool_event = true;
+ ret = strbuf_addstr(events, ",");
+ if (ret)
+ return ret;
+ ret = strbuf_addstr(events, tmp);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
static int metricgroup__build_event_string(struct strbuf *events,
const struct expr_parse_ctx *ctx,
const char *modifier,
@@ -2096,6 +2125,7 @@ static int assign_event_grouping(struct metricgroup__event_info *e,

static int hw_aware_metricgroup__build_event_string(struct list_head *group_strs,
const char *modifier,
+ const bool tool_events[PERF_TOOL_MAX],
struct list_head *groups)
{
struct metricgroup__pmu_group_list *p;
@@ -2103,8 +2133,12 @@ static int hw_aware_metricgroup__build_event_string(struct list_head *group_strs
struct metricgroup__group_events *ge;
bool no_group = true;
int ret = 0;
+ struct strbuf tool_event_str = STRBUF_INIT;
+ bool has_tool_event = false;

#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
+ ret = get_tool_event_str(&tool_event_str, tool_events, &has_tool_event);
+ RETURN_IF_NON_ZERO(ret);

list_for_each_entry(p, groups, nd) {
list_for_each_entry(g, &p->group_head, nd) {
@@ -2176,6 +2210,12 @@ static int hw_aware_metricgroup__build_event_string(struct list_head *group_strs
}
ret = strbuf_addf(events, "}");
RETURN_IF_NON_ZERO(ret);
+
+ if (has_tool_event) {
+ ret = strbuf_addstr(events, tool_event_str.buf);
+ RETURN_IF_NON_ZERO(ret);
+ }
+
pr_debug("events-buf: %s\n", events->buf);
list_add_tail(&new_group_str->nd, group_strs);
}
@@ -2261,6 +2301,7 @@ static int hw_aware_build_grouping(struct expr_parse_ctx *ctx,
if (ret)
goto err_out;
}
+
ret = get_pmu_counter_layouts(&pmu_info_list, ltable);
if (ret)
goto err_out;
@@ -2306,6 +2347,7 @@ static void metricgroup__free_grouping_strs(struct list_head
*/
static int hw_aware_parse_ids(struct perf_pmu *fake_pmu,
struct expr_parse_ctx *ids, const char *modifier,
+ const bool tool_events[PERF_TOOL_MAX],
struct evlist **out_evlist)
{
struct parse_events_error parse_error;
@@ -2319,7 +2361,8 @@ static int hw_aware_parse_ids(struct perf_pmu *fake_pmu,
ret = hw_aware_build_grouping(ids, &grouping);
if (ret)
goto out;
- ret = hw_aware_metricgroup__build_event_string(&grouping_str, modifier, &grouping);
+ ret = hw_aware_metricgroup__build_event_string(&grouping_str, modifier,
+ tool_events, &grouping);
if (ret)
goto out;

@@ -2454,6 +2497,7 @@ static int hw_aware_parse_groups(struct evlist *perf_evlist,
struct evlist *combined_evlist = NULL;
LIST_HEAD(metric_list);
struct metric *m;
+ bool tool_events[PERF_TOOL_MAX] = {false};
int ret;
bool metric_no_group = false;
bool metric_no_merge = false;
@@ -2472,11 +2516,14 @@ static int hw_aware_parse_groups(struct evlist *perf_evlist,
if (!metric_no_merge) {
struct expr_parse_ctx *combined = NULL;

+ find_tool_events(&metric_list, tool_events);
+
ret = hw_aware_build_combined_expr_ctx(&metric_list, &combined);

if (!ret && combined && hashmap__size(combined->ids)) {
ret = hw_aware_parse_ids(fake_pmu, combined,
/*modifier=*/NULL,
+ tool_events,
&combined_evlist);
}

--
2.42.0


2024-04-12 21:15:45

by Wang, Weilin

[permalink] [raw]
Subject: [RFC PATCH v5 15/16] perf stat: use tool event helper function in metricgroup__build_event_string

From: Weilin Wang <[email protected]>

We want to include all used tool events in each event group to ensure
sharing events so that help improve multiplexing.

This change updates parse_id and metricgroupg__build_event_string to use
the get_tool_event_str helper function to generate tool event string
instead of inserting temporary tool event ids and generate string from
the tool event ids.

Signed-off-by: Weilin Wang <[email protected]>
---
tools/perf/util/metricgroup.c | 61 +++++++++++++----------------------
1 file changed, 22 insertions(+), 39 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 86b6528e5a44..39746d18f078 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -774,6 +774,18 @@ static int get_tool_event_str(struct strbuf *events,
int i = 0;
int ret;

+ /*
+ * We may fail to share events between metrics because a tool
+ * event isn't present in one metric. For example, a ratio of
+ * cache misses doesn't need duration_time but the same events
+ * may be used for a misses per second. Events without sharing
+ * implies multiplexing, that is best avoided, so place
+ * all tool events in every group.
+ * This function helps place all tool events in every group by
+ * generating the tool event strbuf that to be added in event
+ * group strings.
+ */
+
perf_tool_event__for_each_event(i) {
if (tool_events[i]) {
const char *tmp = strdup(perf_tool_event__to_str(i));
@@ -795,15 +807,18 @@ static int get_tool_event_str(struct strbuf *events,
static int metricgroup__build_event_string(struct strbuf *events,
const struct expr_parse_ctx *ctx,
const char *modifier,
- bool group_events)
+ bool group_events,
+ const bool tool_events[PERF_TOOL_MAX])
{
struct hashmap_entry *cur;
size_t bkt;
bool no_group = true, has_tool_events = false;
- bool tool_events[PERF_TOOL_MAX] = {false};
int ret = 0;
+ struct strbuf tool_event_str = STRBUF_INIT;

#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
+ ret = get_tool_event_str(&tool_event_str, tool_events, &has_tool_events);
+ RETURN_IF_NON_ZERO(ret);

hashmap__for_each_entry(ctx->ids, cur, bkt) {
const char *sep, *rsep, *id = cur->pkey;
@@ -814,8 +829,6 @@ static int metricgroup__build_event_string(struct strbuf *events,
/* Always move tool events outside of the group. */
ev = perf_tool_event__from_str(id);
if (ev != PERF_TOOL_NONE) {
- has_tool_events = true;
- tool_events[ev] = true;
continue;
}
/* Separate events with commas and open the group if necessary. */
@@ -879,19 +892,8 @@ static int metricgroup__build_event_string(struct strbuf *events,
RETURN_IF_NON_ZERO(ret);
}
if (has_tool_events) {
- int i;
-
- perf_tool_event__for_each_event(i) {
- if (tool_events[i]) {
- if (!no_group) {
- ret = strbuf_addch(events, ',');
- RETURN_IF_NON_ZERO(ret);
- }
- no_group = false;
- ret = strbuf_addstr(events, perf_tool_event__to_str(i));
- RETURN_IF_NON_ZERO(ret);
- }
- }
+ ret = strbuf_addstr(events, tool_event_str.buf);
+ RETURN_IF_NON_ZERO(ret);
}

return ret;
@@ -2421,32 +2423,13 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,

*out_evlist = NULL;
if (!metric_no_merge || hashmap__size(ids->ids) == 0) {
- bool added_event = false;
- int i;
/*
- * We may fail to share events between metrics because a tool
- * event isn't present in one metric. For example, a ratio of
- * cache misses doesn't need duration_time but the same events
- * may be used for a misses per second. Events without sharing
- * implies multiplexing, that is best avoided, so place
- * all tool events in every group.
- *
- * Also, there may be no ids/events in the expression parsing
+ * There may be no ids/events in the expression parsing
* context because of constant evaluation, e.g.:
* event1 if #smt_on else 0
* Add a tool event to avoid a parse error on an empty string.
*/
- perf_tool_event__for_each_event(i) {
- if (tool_events[i]) {
- char *tmp = strdup(perf_tool_event__to_str(i));
-
- if (!tmp)
- return -ENOMEM;
- ids__insert(ids->ids, tmp);
- added_event = true;
- }
- }
- if (!added_event && hashmap__size(ids->ids) == 0) {
+ if (hashmap__size(ids->ids) == 0) {
char *tmp = strdup("duration_time");

if (!tmp)
@@ -2455,7 +2438,7 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
}
}
ret = metricgroup__build_event_string(&events, ids, modifier,
- group_events);
+ group_events, tool_events);
if (ret)
return ret;

--
2.42.0


2024-04-17 03:49:43

by Ian Rogers

[permalink] [raw]
Subject: Re: [RFC PATCH v5 01/16] perf stat: Add new field in stat_config to enable hardware aware grouping.

On Fri, Apr 12, 2024 at 2:08 PM <[email protected]> wrote:
>
> From: Weilin Wang <[email protected]>
>
> Hardware counter and event information could be used to help creating event
> groups that better utilize hardware counters and improve multiplexing.
>
> Reviewed-by: Ian Rogers <[email protected]>
> Signed-off-by: Weilin Wang <[email protected]>
> ---
> tools/perf/builtin-stat.c | 5 +++++
> tools/perf/util/metricgroup.c | 5 +++++
> tools/perf/util/metricgroup.h | 1 +
> tools/perf/util/stat.h | 1 +
> 4 files changed, 12 insertions(+)
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 6bba1a89d030..c4a5f0984295 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -2106,6 +2106,7 @@ static int add_default_attributes(void)
> stat_config.metric_no_threshold,
> stat_config.user_requested_cpu_list,
> stat_config.system_wide,
> + stat_config.hardware_aware_grouping,
> &stat_config.metric_events);
> }
>
> @@ -2139,6 +2140,7 @@ static int add_default_attributes(void)
> stat_config.metric_no_threshold,
> stat_config.user_requested_cpu_list,
> stat_config.system_wide,
> + stat_config.hardware_aware_grouping,
> &stat_config.metric_events);
> }
>
> @@ -2173,6 +2175,7 @@ static int add_default_attributes(void)
> /*metric_no_threshold=*/true,
> stat_config.user_requested_cpu_list,
> stat_config.system_wide,
> + stat_config.hardware_aware_grouping,
> &stat_config.metric_events) < 0)
> return -1;
> }
> @@ -2214,6 +2217,7 @@ static int add_default_attributes(void)
> /*metric_no_threshold=*/true,
> stat_config.user_requested_cpu_list,
> stat_config.system_wide,
> + stat_config.hardware_aware_grouping,
> &stat_config.metric_events) < 0)
> return -1;
>
> @@ -2748,6 +2752,7 @@ int cmd_stat(int argc, const char **argv)
> stat_config.metric_no_threshold,
> stat_config.user_requested_cpu_list,
> stat_config.system_wide,
> + stat_config.hardware_aware_grouping,
> &stat_config.metric_events);
>
> zfree(&metrics);
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index 79ef6095ab28..11613450725a 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -1690,12 +1690,17 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
> bool metric_no_threshold,
> const char *user_requested_cpu_list,
> bool system_wide,
> + bool hardware_aware_grouping,
> struct rblist *metric_events)
> {
> const struct pmu_metrics_table *table = pmu_metrics_table__find();
>
> if (!table)
> return -EINVAL;
> + if (hardware_aware_grouping) {
> + pr_debug("Use hardware aware grouping instead of traditional metric grouping method\n");
> + }

nit: single line if statements shouldn't have curlies:
https://www.kernel.org/doc/html/v6.8/process/coding-style.html#placing-braces-and-spaces

Thanks,
Ian

> +
>
> return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge,
> metric_no_threshold, user_requested_cpu_list, system_wide,
> diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
> index d5325c6ec8e1..779f6ede1b51 100644
> --- a/tools/perf/util/metricgroup.h
> +++ b/tools/perf/util/metricgroup.h
> @@ -77,6 +77,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
> bool metric_no_threshold,
> const char *user_requested_cpu_list,
> bool system_wide,
> + bool hardware_aware_grouping,
> struct rblist *metric_events);
> int metricgroup__parse_groups_test(struct evlist *evlist,
> const struct pmu_metrics_table *table,
> diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
> index d6e5c8787ba2..fd7a187551bd 100644
> --- a/tools/perf/util/stat.h
> +++ b/tools/perf/util/stat.h
> @@ -87,6 +87,7 @@ struct perf_stat_config {
> bool metric_no_group;
> bool metric_no_merge;
> bool metric_no_threshold;
> + bool hardware_aware_grouping;
> bool stop_read_counter;
> bool iostat_run;
> char *user_requested_cpu_list;
> --
> 2.42.0
>

2024-04-17 05:36:13

by Ian Rogers

[permalink] [raw]
Subject: Re: [RFC PATCH v5 05/16] perf stat: Add functions to set counter bitmaps for hardware-grouping method

On Fri, Apr 12, 2024 at 2:08 PM <[email protected]> wrote:
>
> From: Weilin Wang <[email protected]>
>
> Add metricgroup__event_info data structure to represent an event in the
> metric grouping context; the list of counters and the PMU name an event
> should be collected with.
>
> Add functions to parse event counter info from pmu-events and generate a
> list of metricgroup__event_info data to prepare grouping.
>
> Reviewed-by: Ian Rogers <[email protected]>
> Signed-off-by: Weilin Wang <[email protected]>
> ---
> tools/perf/util/metricgroup.c | 219 +++++++++++++++++++++++++++++++++-
> 1 file changed, 216 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index 8047f03b2b1f..1a7ac17f7ae1 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -24,6 +24,7 @@
> #include <linux/list_sort.h>
> #include <linux/string.h>
> #include <linux/zalloc.h>
> +#include <linux/bitmap.h>
> #include <perf/cpumap.h>
> #include <subcmd/parse-options.h>
> #include <api/fs/fs.h>
> @@ -159,6 +160,29 @@ struct metric {
> struct evlist *evlist;
> };
>
> +/* Maximum number of counters per PMU*/
> +#define NR_COUNTERS 16
> +
> +/**
> + * An event used in a metric. This info is for metric grouping.
> + */
> +struct metricgroup__event_info {
> + struct list_head nd;
> + /** The name of the event. */
> + const char *name;
> + /** The name of the pmu the event be collected on. */
> + const char *pmu_name;
> + /** The event uses fixed counter. */
> + bool fixed_counter;
> + /**
> + * The event uses special counters that we consider that as free counter
> + * during the event grouping.
> + */
> + bool free_counter;
> + /** The counters the event allowed to be collected on. */
> + DECLARE_BITMAP(counters, NR_COUNTERS);
> +};
> +
> /**
> * Each group is one node in the group string list.
> */
> @@ -1440,6 +1464,181 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
> return ret;
> }
>
> +/**
> + * set_counter_bitmap - The counter bitmap: [0-15].
> + */
> +static int set_counter_bitmap(int pos, unsigned long *bitmap)
> +{
> + if (pos >= NR_COUNTERS || pos < 0)
> + return -EINVAL;
> + __set_bit(pos, bitmap);
> + return 0;
> +}
> +
> +static int parse_fixed_counter(const char *counter,
> + unsigned long *bitmap,
> + bool *fixed)
> +{
> + int ret = -ENOENT;
> + /* TODO: this pattern maybe different on some other platforms */
> + const char *pattern = "Fixed counter ";
> + int pos = 0;
> +
> + if (!strncmp(counter, pattern, strlen(pattern))) {
> + pos = atoi(counter + strlen(pattern));
> + ret = set_counter_bitmap(pos, bitmap);
> + if (ret)
> + return ret;
> + *fixed = true;
> + return 0;
> + }
> + return ret;
> +}
> +
> +/**
> + * parse_counter - Parse event counter info from pmu-events and set up bitmap
> + * accordingly.
> + *
> + * @counter: counter info string to be parsed.
> + * @bitmap: bitmap to set based on counter info parsed.
> + * @fixed: is set to true if the event uses fixed counter.
> + */
> +static int parse_counter(const char *counter,
> + unsigned long *bitmap,
> + bool *fixed)
> +{
> + int ret = 0;
> + char *p;
> + char *tok;
> + int pos = 0;
> +
> + ret = parse_fixed_counter(counter, bitmap, fixed);
> + /* ret==0 means matched with fixed counter */
> + if (ret == 0)
> + return ret;
> +
> + p = strdup(counter);
> + if (!p)
> + return -ENOMEM;
> + tok = strtok(p, ",");
> + if (!tok)
> + return -ENOENT;
> +
> + while (tok) {
> + pos = atoi(tok);
> + ret = set_counter_bitmap(pos, bitmap);
> + if (ret)
> + return ret;
> + tok = strtok(NULL, ",");
> + }
> + return 0;
> +}
> +
> +static struct metricgroup__event_info *event_info__new(const char *name,
> + const char *pmu_name,
> + const char *counter,
> + bool free_counter)
> +{
> + int ret = 0;
> + char *bit_buf = malloc(NR_COUNTERS);
> + bool fixed_counter = false;
> + struct metricgroup__event_info *e;
> +
> + e = zalloc(sizeof(*e));
> + if (!e)
> + return NULL;
> + if (!pmu_name)
> + pmu_name = "core";
> +
> + e->name = strdup(name);
> + e->pmu_name = strdup(pmu_name);
> + if (!e->pmu_name || !e->name)
> + return NULL;
> + e->free_counter = free_counter;
> + if (free_counter) {
> + ret = set_counter_bitmap(0, e->counters);
> + if (ret)
> + return NULL;
> + } else {
> + ret = parse_counter(counter, e->counters, &fixed_counter);
> + if (ret)
> + return NULL;
> + e->fixed_counter = fixed_counter;
> + }
> +
> + bitmap_scnprintf(e->counters, NR_COUNTERS, bit_buf, NR_COUNTERS);
> + pr_debug("Event %s requires pmu %s counter: %s bitmap %s, [pmu=%s]\n",
> + e->name, e->pmu_name, counter, bit_buf, pmu_name);
> +
> + return e;
> +}
> +
> +struct metricgroup__add_metric_event_data {
> + struct list_head *list;
> + /* pure event name, exclude umask and other info*/
> + const char *event_name;
> + /* event name and umask if applicable*/
> + const char *event_id;
> +};
> +
> +static int metricgroup__add_metric_event_callback(const struct pmu_event *pe,
> + const struct pmu_events_table *table __maybe_unused,
> + void *data)
> +{
> + struct metricgroup__event_info *event;
> + struct metricgroup__add_metric_event_data *d = data;
> +
> + if (!strcasecmp(pe->name, d->event_name)) {
> + event = event_info__new(d->event_id, pe->pmu, pe->counter, /*free_counter=*/false);

I'm seeing a build error at this patch:
```
CC /tmp/perf/util/metricgroup.o
util/metricgroup.c: In function ‘metricgroup__add_metric_event_callback’:
util/metricgroup.c:1592:65: error: ‘const struct pmu_event’ has no
member named ‘counter’
1592 | event = event_info__new(d->event_id, pe->pmu,
pe->counter, /*free_counter=*/false);
| ^~
util/metricgroup.c: In function ‘get_metricgroup_events’:
util/metricgroup.c:1633:23: error: implicit declaration of function
‘pmu_events_table_for_each_event’; did you mean
‘pmu_events_table__for_each_event’?
[-Werror=implicit-function-declaration]
1633 | ret = pmu_events_table_for_each_event(table,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| pmu_events_table__for_each_event
cc1: all warnings being treated as errors
```
it must be resolved by a later patch,

Thanks,
Ian

> + if (!event)
> + return -ENOMEM;
> + list_add(&event->nd, d->list);
> + }
> +
> + return 0;
> +}
> +
> +/**
> + * get_metricgroup_events - Find counter requirement of events from the
> + * pmu_events table
> + * @full_id: the full event identifiers.
> + * @table: pmu_events table that is searched for event data.
> + * @event_info_list: the list that the new event counter info added to.
> + */
> +static int get_metricgroup_events(const char *full_id,
> + const struct pmu_events_table *table,
> + struct list_head *event_info_list)
> +{
> + LIST_HEAD(list);
> + int ret = 0;
> + const char *id;
> + const char *rsep, *sep = strchr(full_id, '@');
> +
> + if (sep) {
> + rsep = strchr(full_id, ',');
> + id = strndup(sep + 1, rsep - sep - 1);
> + if (!id) {
> + ret = -ENOMEM;
> + goto out;
> + }
> + } else {
> + id = full_id;
> + }
> + {
> + struct metricgroup__add_metric_event_data data = {
> + .list = &list,
> + .event_name = id,
> + .event_id = full_id,
> + };
> + ret = pmu_events_table_for_each_event(table,
> + metricgroup__add_metric_event_callback, &data);
> + }
> +
> +out:
> + list_splice(&list, event_info_list);
> + return ret;
> +}
> +
> /**
> * hw_aware_build_grouping - Build event groupings by reading counter
> * requirement of the events and counter available on the system from
> @@ -1453,9 +1652,25 @@ static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,
> const char *modifier __maybe_unused)
> {
> int ret = 0;
> + struct hashmap_entry *cur;
> + LIST_HEAD(pmu_info_list);
> + LIST_HEAD(event_info_list);
> + size_t bkt;
> + const struct pmu_events_table *etable = perf_pmu__find_events_table(NULL);
> +
> +#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
> + hashmap__for_each_entry(ctx->ids, cur, bkt) {
> + const char *id = cur->pkey;
> +
> + pr_debug("found event %s\n", id);
> +
> + ret = get_metricgroup_events(id, etable, &event_info_list);
> + if (ret)
> + return ret;
> + }
>
> - pr_debug("This is a placeholder\n");
> return ret;
> +#undef RETURN_IF_NON_ZERO
> }
>
> static void group_str_free(struct metricgroup__group_strs *g)
> @@ -1529,8 +1744,6 @@ static int hw_aware_parse_ids(struct perf_pmu *fake_pmu,
> *out_evlist = parsed_evlist;
> parsed_evlist = NULL;
> err_out:
> - parse_events_error__exit(&parse_error);
> - evlist__delete(parsed_evlist);
> metricgroup__free_grouping_strs(&groupings);
> return ret;
> }
> --
> 2.42.0
>

2024-04-17 06:13:36

by Ian Rogers

[permalink] [raw]
Subject: Re: [RFC PATCH v5 09/16] perf stat: Add function to handle special events in hardware-grouping

On Fri, Apr 12, 2024 at 2:08 PM <[email protected]> wrote:
>
> From: Weilin Wang <[email protected]>
>
> There are some special events like topdown events and TSC that are not
> described in pmu-event JSON files. Add support to handle this type of
> events. This should be considered as a temporary solution because including
> these events in JSON files would be a better solution.

What is going to happen in other cases uncore, software and core
events for other architectures? Topdown is annoyingly special but the
MSR events should be similar to tool events like duration_time, in
that they don't have grouping restrictions.

Thanks,
Ian

>
> Signed-off-by: Weilin Wang <[email protected]>
> ---
> tools/perf/util/metricgroup.c | 38 ++++++++++++++++++++++++++++++++++-
> 1 file changed, 37 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index 04d988ace734..681aacc15787 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -162,6 +162,20 @@ struct metric {
>
> /* Maximum number of counters per PMU*/
> #define NR_COUNTERS 16
> +/* Special events that are not described in pmu-event JSON files.
> + * topdown-* and TSC use dedicated registers, set as free
> + * counter for grouping purpose
> + */
> +enum special_events {
> + TOPDOWN = 0,
> + TSC = 1,
> + SPECIAL_EVENT_MAX,
> +};
> +
> +static const char *const special_event_names[SPECIAL_EVENT_MAX] = {
> + "topdown-",
> + "TSC",
> +};
>
> /**
> * An event used in a metric. This info is for metric grouping.
> @@ -2142,6 +2156,15 @@ static int create_grouping(struct list_head *pmu_info_list,
> return ret;
> };
>
> +static bool is_special_event(const char *id)
> +{
> + for (int i = 0; i < SPECIAL_EVENT_MAX; i++) {
> + if (!strncmp(id, special_event_names[i], strlen(special_event_names[i])))
> + return true;
> + }
> + return false;
> +}
> +
> /**
> * hw_aware_build_grouping - Build event groupings by reading counter
> * requirement of the events and counter available on the system from
> @@ -2166,6 +2189,17 @@ static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,
> hashmap__for_each_entry(ctx->ids, cur, bkt) {
> const char *id = cur->pkey;
>
> + if (is_special_event(id)) {
> + struct metricgroup__event_info *event;
> +
> + event = event_info__new(id, "default_core", "0",
> + /*free_counter=*/true);
> + if (!event)
> + goto err_out;
> +
> + list_add(&event->nd, &event_info_list);
> + continue;
> + }
> ret = get_metricgroup_events(id, etable, &event_info_list);
> if (ret)
> goto err_out;
> @@ -2636,8 +2670,10 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
> ret = hw_aware_parse_groups(perf_evlist, pmu, str,
> metric_no_threshold, user_requested_cpu_list, system_wide,
> /*fake_pmu=*/NULL, metric_events, table);
> - if (!ret)
> + if (!ret) {
> + pr_info("Hardware aware grouping completed\n");
> return 0;
> + }
> }
>
> return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge,
> --
> 2.42.0
>

2024-04-17 06:36:34

by Ian Rogers

[permalink] [raw]
Subject: Re: [RFC PATCH v5 15/16] perf stat: use tool event helper function in metricgroup__build_event_string

On Fri, Apr 12, 2024 at 2:08 PM <[email protected]> wrote:
>
> From: Weilin Wang <[email protected]>
>
> We want to include all used tool events in each event group to ensure
> sharing events so that help improve multiplexing.
>
> This change updates parse_id and metricgroupg__build_event_string to use

nit: typo in metricgroup__build_event_string

> the get_tool_event_str helper function to generate tool event string
> instead of inserting temporary tool event ids and generate string from
> the tool event ids.
>
> Signed-off-by: Weilin Wang <[email protected]>
> ---
> tools/perf/util/metricgroup.c | 61 +++++++++++++----------------------
> 1 file changed, 22 insertions(+), 39 deletions(-)
>
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index 86b6528e5a44..39746d18f078 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -774,6 +774,18 @@ static int get_tool_event_str(struct strbuf *events,
> int i = 0;
> int ret;
>
> + /*
> + * We may fail to share events between metrics because a tool
> + * event isn't present in one metric. For example, a ratio of
> + * cache misses doesn't need duration_time but the same events
> + * may be used for a misses per second. Events without sharing
> + * implies multiplexing, that is best avoided, so place
> + * all tool events in every group.
> + * This function helps place all tool events in every group by
> + * generating the tool event strbuf that to be added in event
> + * group strings.
> + */
> +
> perf_tool_event__for_each_event(i) {
> if (tool_events[i]) {
> const char *tmp = strdup(perf_tool_event__to_str(i));
> @@ -795,15 +807,18 @@ static int get_tool_event_str(struct strbuf *events,
> static int metricgroup__build_event_string(struct strbuf *events,
> const struct expr_parse_ctx *ctx,
> const char *modifier,
> - bool group_events)
> + bool group_events,
> + const bool tool_events[PERF_TOOL_MAX])
> {
> struct hashmap_entry *cur;
> size_t bkt;
> bool no_group = true, has_tool_events = false;
> - bool tool_events[PERF_TOOL_MAX] = {false};
> int ret = 0;
> + struct strbuf tool_event_str = STRBUF_INIT;
>
> #define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
> + ret = get_tool_event_str(&tool_event_str, tool_events, &has_tool_events);
> + RETURN_IF_NON_ZERO(ret);
>
> hashmap__for_each_entry(ctx->ids, cur, bkt) {
> const char *sep, *rsep, *id = cur->pkey;
> @@ -814,8 +829,6 @@ static int metricgroup__build_event_string(struct strbuf *events,
> /* Always move tool events outside of the group. */
> ev = perf_tool_event__from_str(id);
> if (ev != PERF_TOOL_NONE) {
> - has_tool_events = true;
> - tool_events[ev] = true;
> continue;
> }
> /* Separate events with commas and open the group if necessary. */
> @@ -879,19 +892,8 @@ static int metricgroup__build_event_string(struct strbuf *events,
> RETURN_IF_NON_ZERO(ret);
> }
> if (has_tool_events) {
> - int i;
> -
> - perf_tool_event__for_each_event(i) {
> - if (tool_events[i]) {
> - if (!no_group) {
> - ret = strbuf_addch(events, ',');
> - RETURN_IF_NON_ZERO(ret);
> - }
> - no_group = false;
> - ret = strbuf_addstr(events, perf_tool_event__to_str(i));
> - RETURN_IF_NON_ZERO(ret);
> - }
> - }
> + ret = strbuf_addstr(events, tool_event_str.buf);
> + RETURN_IF_NON_ZERO(ret);
> }
>
> return ret;
> @@ -2421,32 +2423,13 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
>
> *out_evlist = NULL;
> if (!metric_no_merge || hashmap__size(ids->ids) == 0) {
> - bool added_event = false;
> - int i;
> /*
> - * We may fail to share events between metrics because a tool
> - * event isn't present in one metric. For example, a ratio of
> - * cache misses doesn't need duration_time but the same events
> - * may be used for a misses per second. Events without sharing
> - * implies multiplexing, that is best avoided, so place
> - * all tool events in every group.
> - *
> - * Also, there may be no ids/events in the expression parsing
> + * There may be no ids/events in the expression parsing
> * context because of constant evaluation, e.g.:
> * event1 if #smt_on else 0
> * Add a tool event to avoid a parse error on an empty string.
> */
> - perf_tool_event__for_each_event(i) {
> - if (tool_events[i]) {
> - char *tmp = strdup(perf_tool_event__to_str(i));
> -
> - if (!tmp)
> - return -ENOMEM;
> - ids__insert(ids->ids, tmp);
> - added_event = true;
> - }
> - }
> - if (!added_event && hashmap__size(ids->ids) == 0) {
> + if (hashmap__size(ids->ids) == 0) {
> char *tmp = strdup("duration_time");
>
> if (!tmp)
> @@ -2455,7 +2438,7 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
> }
> }
> ret = metricgroup__build_event_string(&events, ids, modifier,
> - group_events);
> + group_events, tool_events);
> if (ret)
> return ret;
>
> --
> 2.42.0
>

2024-04-17 16:21:47

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [RFC PATCH v5 01/16] perf stat: Add new field in stat_config to enable hardware aware grouping.

On Tue, Apr 16, 2024 at 08:49:21PM -0700, Ian Rogers wrote:
> On Fri, Apr 12, 2024 at 2:08 PM <[email protected]> wrote:
> >
> > From: Weilin Wang <[email protected]>
> >
> > Hardware counter and event information could be used to help creating event
> > groups that better utilize hardware counters and improve multiplexing.
> >
> > Reviewed-by: Ian Rogers <[email protected]>
> > Signed-off-by: Weilin Wang <[email protected]>
> > ---
> > tools/perf/builtin-stat.c | 5 +++++
> > tools/perf/util/metricgroup.c | 5 +++++
> > tools/perf/util/metricgroup.h | 1 +
> > tools/perf/util/stat.h | 1 +
> > 4 files changed, 12 insertions(+)
> >
> > diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> > index 6bba1a89d030..c4a5f0984295 100644
> > --- a/tools/perf/builtin-stat.c
> > +++ b/tools/perf/builtin-stat.c
> > @@ -2106,6 +2106,7 @@ static int add_default_attributes(void)
> > stat_config.metric_no_threshold,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > + stat_config.hardware_aware_grouping,
> > &stat_config.metric_events);
> > }
> >
> > @@ -2139,6 +2140,7 @@ static int add_default_attributes(void)
> > stat_config.metric_no_threshold,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > + stat_config.hardware_aware_grouping,
> > &stat_config.metric_events);
> > }
> >
> > @@ -2173,6 +2175,7 @@ static int add_default_attributes(void)
> > /*metric_no_threshold=*/true,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > + stat_config.hardware_aware_grouping,
> > &stat_config.metric_events) < 0)
> > return -1;
> > }
> > @@ -2214,6 +2217,7 @@ static int add_default_attributes(void)
> > /*metric_no_threshold=*/true,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > + stat_config.hardware_aware_grouping,
> > &stat_config.metric_events) < 0)
> > return -1;
> >
> > @@ -2748,6 +2752,7 @@ int cmd_stat(int argc, const char **argv)
> > stat_config.metric_no_threshold,
> > stat_config.user_requested_cpu_list,
> > stat_config.system_wide,
> > + stat_config.hardware_aware_grouping,
> > &stat_config.metric_events);
> >
> > zfree(&metrics);
> > diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> > index 79ef6095ab28..11613450725a 100644
> > --- a/tools/perf/util/metricgroup.c
> > +++ b/tools/perf/util/metricgroup.c
> > @@ -1690,12 +1690,17 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
> > bool metric_no_threshold,
> > const char *user_requested_cpu_list,
> > bool system_wide,
> > + bool hardware_aware_grouping,
> > struct rblist *metric_events)
> > {
> > const struct pmu_metrics_table *table = pmu_metrics_table__find();
> >
> > if (!table)
> > return -EINVAL;
> > + if (hardware_aware_grouping) {
> > + pr_debug("Use hardware aware grouping instead of traditional metric grouping method\n");
> > + }
>
> nit: single line if statements shouldn't have curlies:
> https://www.kernel.org/doc/html/v6.8/process/coding-style.html#placing-braces-and-spaces

Fixed this while applying this patch.

- Arnaldo

2024-04-17 16:24:17

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [RFC PATCH v5 02/16] perf stat: Add basic functions for the hardware aware grouping

On Tue, Apr 16, 2024 at 09:56:38PM -0700, Ian Rogers wrote:
> On Fri, Apr 12, 2024 at 2:08 PM <[email protected]> wrote:
> >
> > From: Weilin Wang <[email protected]>
> >
> > Add the first set of functions for the hardware aware grouping method. Function
> > hw_aware_parse_groups() is the entry point of this metric grouping method. It
> > does metric grouping on a combined list of events and will create a list of
> > grouping strings as final results of the grouping method. These grouping strings
> > will be used in the same manner as existing metric grouping process.
> >
> > This method will fall back to normal grouping when hardware aware grouping
> > return with err so that perf stat still executes and returns with correct
> > result.
> >
> > Signed-off-by: Weilin Wang <[email protected]>
>
> Reviewed-by: Ian Rogers <[email protected]>

This one isn't applying:

⬢[acme@toolbox perf-tools-next]$ git log -10 --oneline tools/perf/util/metricgroup.c
b6226e7e6daa823d (HEAD -> perf-tools-next) perf stat: Add new field in stat_config to enable hardware aware grouping
4b5ee6db2d3cd624 perf metrics: Remove the "No_group" metric group
97b6b4ac1c5dd42a perf metrics: Fix segv for metrics with no events
d4be39cadef0dbba perf metrics: Fix metric matching
a59fb796a36bb6c2 perf metrics: Compute unmerged uncore metrics individually
7bbe8f0071dfa23f perf tools: Fix calloc() arguments to address error introduced in gcc-14
e2b005d6ec0e738d perf metrics: Avoid segv if default metricgroup isn't set
54409997d4b99ab6 perf metric: "Compat" supports regular expression matching identifiers
4000519eb0c66594 perf pmu-events: Add extra underscore to function names
1c0e47956a8e1109 perf metrics: Sort the Default metricgroup
⬢[acme@toolbox perf-tools-next]$

So just the first patch was applied so far, I'll push what I have to
tmp.perf-tools-next and then later to perf-tools-next so that this work
can be continued from there.

- Arnaldo

> > ---
> > tools/perf/util/metricgroup.c | 217 +++++++++++++++++++++++++++++++++-
> > 1 file changed, 216 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> > index 11613450725a..8047f03b2b1f 100644
> > --- a/tools/perf/util/metricgroup.c
> > +++ b/tools/perf/util/metricgroup.c
> > @@ -159,6 +159,14 @@ struct metric {
> > struct evlist *evlist;
> > };
> >
> > +/**
> > + * Each group is one node in the group string list.
> > + */
> > +struct metricgroup__group_strs {
> > + struct list_head nd;
> > + struct strbuf grouping_str;
> > +};
> > +
> > static void metric__watchdog_constraint_hint(const char *name, bool foot)
> > {
> > static bool violate_nmi_constraint;
> > @@ -1432,6 +1440,101 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
> > return ret;
> > }
> >
> > +/**
> > + * hw_aware_build_grouping - Build event groupings by reading counter
> > + * requirement of the events and counter available on the system from
> > + * pmu-events.
> > + * @ctx: the event identifiers parsed from metrics.
> > + * @groupings: header to the list of final event grouping.
> > + * @modifier: any modifiers added to the events.
> > + */
> > +static int hw_aware_build_grouping(struct expr_parse_ctx *ctx __maybe_unused,
> > + struct list_head *groupings __maybe_unused,
> > + const char *modifier __maybe_unused)
> > +{
> > + int ret = 0;
> > +
> > + pr_debug("This is a placeholder\n");
> > + return ret;
> > +}
> > +
> > +static void group_str_free(struct metricgroup__group_strs *g)
> > +{
> > + if (!g)
> > + return;
> > +
> > + strbuf_release(&g->grouping_str);
> > + free(g);
> > +}
> > +
> > +static void metricgroup__free_grouping_strs(struct list_head
> > + *grouping_strs)
> > +{
> > + struct metricgroup__group_strs *g, *tmp;
> > +
> > + list_for_each_entry_safe(g, tmp, grouping_strs, nd) {
> > + list_del_init(&g->nd);
> > + group_str_free(g);
> > + }
> > +}
> > +
> > +/**
> > + * hw_aware_parse_ids - Build the event string for the ids and parse them
> > + * creating an evlist. The encoded metric_ids are decoded. Events are placed
> > + * into groups based on event counter requirements and counter availabilities of
> > + * the system.
> > + * @metric_no_merge: is metric sharing explicitly disabled.
> > + * @fake_pmu: used when testing metrics not supported by the current CPU.
> > + * @ids: the event identifiers parsed from a metric.
> > + * @modifier: any modifiers added to the events.
> > + * @out_evlist: the created list of events.
> > + */
> > +static int hw_aware_parse_ids(struct perf_pmu *fake_pmu,
> > + struct expr_parse_ctx *ids, const char *modifier,
> > + struct evlist **out_evlist)
> > +{
> > + struct parse_events_error parse_error;
> > + struct evlist *parsed_evlist;
> > + LIST_HEAD(groupings);
> > + struct metricgroup__group_strs *group;
> > + int ret;
> > +
> > + *out_evlist = NULL;
> > + ret = hw_aware_build_grouping(ids, &groupings, modifier);
> > + if (ret) {
> > + metricgroup__free_grouping_strs(&groupings);
> > + return ret;
> > + }
> > +
> > + parsed_evlist = evlist__new();
> > + if (!parsed_evlist) {
> > + ret = -ENOMEM;
> > + goto err_out;
> > + }
> > + list_for_each_entry(group, &groupings, nd) {
> > + struct strbuf *events = &group->grouping_str;
> > +
> > + pr_debug("Parsing metric events '%s'\n", events->buf);
> > + parse_events_error__init(&parse_error);
> > + ret = __parse_events(parsed_evlist, events->buf, /*pmu_filter=*/NULL,
> > + &parse_error, fake_pmu, /*warn_if_reordered=*/false);
> > + if (ret) {
> > + parse_events_error__print(&parse_error, events->buf);
> > + goto err_out;
> > + }
> > + ret = decode_all_metric_ids(parsed_evlist, modifier);
> > + if (ret)
> > + goto err_out;
> > + }
> > + *out_evlist = parsed_evlist;
> > + parsed_evlist = NULL;
> > +err_out:
> > + parse_events_error__exit(&parse_error);
> > + evlist__delete(parsed_evlist);
> > + metricgroup__free_grouping_strs(&groupings);
> > + return ret;
> > +}
> > +
> > /**
> > * parse_ids - Build the event string for the ids and parse them creating an
> > * evlist. The encoded metric_ids are decoded.
> > @@ -1520,6 +1623,113 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
> > return ret;
> > }
> >
> > +static int hw_aware_parse_groups(struct evlist *perf_evlist,
> > + const char *pmu, const char *str,
> > + bool metric_no_threshold,
> > + const char *user_requested_cpu_list,
> > + bool system_wide,
> > + struct perf_pmu *fake_pmu,
> > + struct rblist *metric_events_list,
> > + const struct pmu_metrics_table *table)
> > +{
> > + struct evlist *combined_evlist = NULL;
> > + LIST_HEAD(metric_list);
> > + struct metric *m;
> > + int ret;
> > + bool metric_no_group = false;
> > + bool metric_no_merge = false;
> > +
> > + if (metric_events_list->nr_entries == 0)
> > + metricgroup__rblist_init(metric_events_list);
> > + ret = metricgroup__add_metric_list(pmu, str, metric_no_group, metric_no_threshold,
> > + user_requested_cpu_list,
> > + system_wide, &metric_list, table);
> > + if (ret)
> > + goto out;
> > +
> > + /* Sort metrics from largest to smallest. */
> > + list_sort(NULL, &metric_list, metric_list_cmp);
> > +
> > + if (!metric_no_merge) {
> > + struct expr_parse_ctx *combined = NULL;
> > +
> > + ret = build_combined_expr_ctx(&metric_list, &combined);
> > +
> > + if (!ret && combined && hashmap__size(combined->ids)) {
> > + ret = hw_aware_parse_ids(fake_pmu, combined,
> > + /*modifier=*/NULL,
> > + &combined_evlist);
> > + }
> > +
> > + if (combined)
> > + expr__ctx_free(combined);
> > + if (ret)
> > + goto out;
> > + }
> > +
> > + list_for_each_entry(m, &metric_list, nd) {
> > + struct metric_expr *expr;
> > + struct metric_event *me;
> > + struct evsel **metric_events;
> > +
> > + ret = setup_metric_events(fake_pmu ? "all" : m->pmu, m->pctx->ids,
> > + combined_evlist, &metric_events);
> > + if (ret) {
> > + pr_debug("Cannot resolve IDs for %s: %s\n",
> > + m->metric_name, m->metric_expr);
> > + goto out;
> > + }
> > +
> > + me = metricgroup__lookup(metric_events_list, metric_events[0], true);
> > +
> > + expr = malloc(sizeof(struct metric_expr));
> > + if (!expr) {
> > + ret = -ENOMEM;
> > + free(metric_events);
> > + goto out;
> > + }
> > +
> > + expr->metric_refs = m->metric_refs;
> > + m->metric_refs = NULL;
> > + expr->metric_expr = m->metric_expr;
> > + if (m->modifier) {
> > + char *tmp;
> > +
> > + if (asprintf(&tmp, "%s:%s", m->metric_name, m->modifier) < 0)
> > + expr->metric_name = NULL;
> > + else
> > + expr->metric_name = tmp;
> > + } else {
> > + expr->metric_name = strdup(m->metric_name);
> > + }
> > +
> > + if (!expr->metric_name) {
> > + ret = -ENOMEM;
> > + free(metric_events);
> > + goto out;
> > + }
> > + expr->metric_threshold = m->metric_threshold;
> > + expr->metric_unit = m->metric_unit;
> > + expr->metric_events = metric_events;
> > + expr->runtime = m->pctx->sctx.runtime;
> > + list_add(&expr->nd, &me->head);
> > + }
> > +
> > + if (combined_evlist) {
> > + evlist__splice_list_tail(perf_evlist, &combined_evlist->core.entries);
> > + evlist__delete(combined_evlist);
> > + }
> > +
> > + list_for_each_entry(m, &metric_list, nd) {
> > + if (m->evlist)
> > + evlist__splice_list_tail(perf_evlist, &m->evlist->core.entries);
> > + }
> > +
> > +out:
> > + metricgroup__free_metrics(&metric_list);
> > + return ret;
> > +}
> > +
> > static int parse_groups(struct evlist *perf_evlist,
> > const char *pmu, const char *str,
> > bool metric_no_group,
> > @@ -1698,10 +1908,15 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
> > if (!table)
> > return -EINVAL;
> > if (hardware_aware_grouping) {
> > + int ret;
>
> nit: there should be a '\n' between variables and code,
> scripts/checkpatch.pl sould catch this.
>
> > pr_debug("Use hardware aware grouping instead of traditional metric grouping method\n");
> > + ret = hw_aware_parse_groups(perf_evlist, pmu, str,
> > + metric_no_threshold, user_requested_cpu_list, system_wide,
> > + /*fake_pmu=*/NULL, metric_events, table);
> > + if (!ret)
> > + return 0;
> > }
> >
> > -
>
> nit: unneeded whitespace change.
>
> Thanks,
> Ian
>
> > return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge,
> > metric_no_threshold, user_requested_cpu_list, system_wide,
> > /*fake_pmu=*/NULL, metric_events, table);
> > --
> > 2.42.0
> >