2021-01-14 03:42:51

by Leo Yan

[permalink] [raw]
Subject: [PATCH v3 0/5] perf c2c: Code refactoring

This patch series is for several minor code refactoring, which is
extracted from the patch series "perf c2c: Sort cacheline with all
loads" [1].

There has a known issue for Arm SPE store operations and Arm SPE is
the only consumer for soring with all loads, this is the reason in this
series drops the changes for dimensions and sorting, and only extracts
the patches related with code refactoring. So this series doesn't
introduce any functionality change.

The patches have been tested on x86_64 and compared the result before
and after applying the patches, and confirmed no difference for the
output result.

Changes from v2:
* Changed to use static functions to replace macros (Namhyung);
* Added Jiri's Ack tags in the unchanged patches;
* Minor improvement in the commit logs.

[1] https://lore.kernel.org/patchwork/cover/1353064/


Leo Yan (5):
perf c2c: Rename for shared cache line stats
perf c2c: Refactor hist entry validation
perf c2c: Refactor display filter
perf c2c: Refactor node display
perf c2c: Add local variables for output metrics

tools/perf/builtin-c2c.c | 173 ++++++++++++++++++++++++---------------
1 file changed, 105 insertions(+), 68 deletions(-)

--
2.25.1


2021-01-14 03:43:00

by Leo Yan

[permalink] [raw]
Subject: [PATCH v3 2/5] perf c2c: Refactor hist entry validation

This patch has no any functionality changes but refactors hist entry
validation for cache line resorting.

It renames function "valid_hitm_or_store()" to "is_valid_hist_entry()",
changes return type from integer type to bool type. In the function,
it uses switch-case instead of ternary operators, which is easier
to extend for more display types.

Signed-off-by: Leo Yan <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
---
tools/perf/builtin-c2c.c | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 2d0c71300dbf..bc2ee84298ff 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1888,16 +1888,32 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
return he->filtered == 0;
}

-static inline int valid_hitm_or_store(struct hist_entry *he)
+static inline bool is_valid_hist_entry(struct hist_entry *he)
{
struct c2c_hist_entry *c2c_he;
- bool has_hitm;
+ bool has_record = false;

c2c_he = container_of(he, struct c2c_hist_entry, he);
- has_hitm = c2c.display == DISPLAY_TOT ? c2c_he->stats.tot_hitm :
- c2c.display == DISPLAY_LCL ? c2c_he->stats.lcl_hitm :
- c2c_he->stats.rmt_hitm;
- return has_hitm || c2c_he->stats.store;
+
+ /* It's a valid entry if contains stores */
+ if (c2c_he->stats.store)
+ return true;
+
+ switch (c2c.display) {
+ case DISPLAY_LCL:
+ has_record = !!c2c_he->stats.lcl_hitm;
+ break;
+ case DISPLAY_RMT:
+ has_record = !!c2c_he->stats.rmt_hitm;
+ break;
+ case DISPLAY_TOT:
+ has_record = !!c2c_he->stats.tot_hitm;
+ break;
+ default:
+ break;
+ }
+
+ return has_record;
}

static void set_node_width(struct c2c_hist_entry *c2c_he, int len)
@@ -1951,7 +1967,7 @@ static int filter_cb(struct hist_entry *he, void *arg __maybe_unused)

calc_width(c2c_he);

- if (!valid_hitm_or_store(he))
+ if (!is_valid_hist_entry(he))
he->filtered = HIST_FILTER__C2C;

return 0;
--
2.25.1

2021-01-14 03:43:07

by Leo Yan

[permalink] [raw]
Subject: [PATCH v3 4/5] perf c2c: Refactor node display

The macro DISPLAY_HITM() is used to calculate HITM percentage introduced
by every node and it's shown for the node info.

This patch introduces the static function display_metrics() to replace
the macro, and the parameters are refined for passing the metric's
statistic value and the sum value.

Signed-off-by: Leo Yan <[email protected]>
---
tools/perf/builtin-c2c.c | 33 ++++++++++++++++++++-------------
1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index de1b804d31be..fe811b8e02bb 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1048,6 +1048,19 @@ empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return 0;
}

+static int display_metrics(struct perf_hpp *hpp, u32 val, u32 sum)
+{
+ int ret;
+
+ if ((sum) > 0)
+ ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ",
+ percent((val), (sum)));
+ else
+ ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a");
+
+ return ret;
+}
+
static int
node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
struct hist_entry *he)
@@ -1091,29 +1104,23 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num);
advance_hpp(hpp, ret);

- #define DISPLAY_HITM(__h) \
- if (c2c_he->stats.__h> 0) { \
- ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", \
- percent(stats->__h, c2c_he->stats.__h));\
- } else { \
- ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); \
- }
-
switch (c2c.display) {
case DISPLAY_RMT:
- DISPLAY_HITM(rmt_hitm);
+ ret = display_metrics(hpp, stats->rmt_hitm,
+ c2c_he->stats.rmt_hitm);
break;
case DISPLAY_LCL:
- DISPLAY_HITM(lcl_hitm);
+ ret = display_metrics(hpp, stats->lcl_hitm,
+ c2c_he->stats.lcl_hitm);
break;
case DISPLAY_TOT:
- DISPLAY_HITM(tot_hitm);
+ ret = display_metrics(hpp, stats->tot_hitm,
+ c2c_he->stats.tot_hitm);
+ break;
default:
break;
}

- #undef DISPLAY_HITM
-
advance_hpp(hpp, ret);

if (c2c_he->stats.store > 0) {
--
2.25.1

2021-01-14 03:45:54

by Leo Yan

[permalink] [raw]
Subject: [PATCH v3 5/5] perf c2c: Add local variables for output metrics

This patch adds several local variables:

"cl_output": pointer for outputting single cache line metrics;
"output_str": pointer for outputting cache line metrics;
"sort_str": pointer to the sorting metrics.

This can improve readability for the code and it's more flexible when
later extend to different strings for the output metrics.

Signed-off-by: Leo Yan <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
---
tools/perf/builtin-c2c.c | 59 ++++++++++++++++++++++------------------
1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index fe811b8e02bb..1c5f0f8f483a 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2206,16 +2206,17 @@ static void print_pareto(FILE *out)
struct perf_hpp_list hpp_list;
struct rb_node *nd;
int ret;
+ const char *cl_output;
+
+ cl_output = "cl_num,"
+ "cl_rmt_hitm,"
+ "cl_lcl_hitm,"
+ "cl_stores_l1hit,"
+ "cl_stores_l1miss,"
+ "dcacheline";

perf_hpp_list__init(&hpp_list);
- ret = hpp_list__parse(&hpp_list,
- "cl_num,"
- "cl_rmt_hitm,"
- "cl_lcl_hitm,"
- "cl_stores_l1hit,"
- "cl_stores_l1miss,"
- "dcacheline",
- NULL);
+ ret = hpp_list__parse(&hpp_list, cl_output, NULL);

if (WARN_ONCE(ret, "failed to setup sort entries\n"))
return;
@@ -2759,6 +2760,7 @@ static int perf_c2c__report(int argc, const char **argv)
OPT_END()
};
int err = 0;
+ const char *output_str, *sort_str = NULL;

argc = parse_options(argc, argv, options, report_c2c_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
@@ -2835,24 +2837,29 @@ static int perf_c2c__report(int argc, const char **argv)
goto out_mem2node;
}

- c2c_hists__reinit(&c2c.hists,
- "cl_idx,"
- "dcacheline,"
- "dcacheline_node,"
- "dcacheline_count,"
- "percent_hitm,"
- "tot_hitm,lcl_hitm,rmt_hitm,"
- "tot_recs,"
- "tot_loads,"
- "tot_stores,"
- "stores_l1hit,stores_l1miss,"
- "ld_fbhit,ld_l1hit,ld_l2hit,"
- "ld_lclhit,lcl_hitm,"
- "ld_rmthit,rmt_hitm,"
- "dram_lcl,dram_rmt",
- c2c.display == DISPLAY_TOT ? "tot_hitm" :
- c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm"
- );
+ output_str = "cl_idx,"
+ "dcacheline,"
+ "dcacheline_node,"
+ "dcacheline_count,"
+ "percent_hitm,"
+ "tot_hitm,lcl_hitm,rmt_hitm,"
+ "tot_recs,"
+ "tot_loads,"
+ "tot_stores,"
+ "stores_l1hit,stores_l1miss,"
+ "ld_fbhit,ld_l1hit,ld_l2hit,"
+ "ld_lclhit,lcl_hitm,"
+ "ld_rmthit,rmt_hitm,"
+ "dram_lcl,dram_rmt";
+
+ if (c2c.display == DISPLAY_TOT)
+ sort_str = "tot_hitm";
+ else if (c2c.display == DISPLAY_RMT)
+ sort_str = "rmt_hitm";
+ else if (c2c.display == DISPLAY_LCL)
+ sort_str = "lcl_hitm";
+
+ c2c_hists__reinit(&c2c.hists, output_str, sort_str);

ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");

--
2.25.1