This patch set supports per-sample freq/CPU%/CORE_BUSY% print in perf
report -D and --stdio.
For printing these information, the perf.data file must have been obtained
by group read and using special events cycles, ref-cycles, msr/tsc/,
msr/aperf/ or msr/mperf/.
- Freq (MHz): The frequency during the sample interval. Needs cycles
ref-cycles event.
- CPU%: CPU utilization during the sample interval. Needs ref-cycles and
msr/tsc/ events.
- CORE_BUSY%: actual percent performance (APERF/MPERF%) during the
sample interval. Needs msr/aperf/ and msr/mperf/ events.
Here is an example:
$ perf record -e
'{cycles,ref-cycles,msr/tsc/,msr/mperf/,msr/aperf/}:S' ~/tchain_edit
$ perf report --stdio --group --show-freq-perf
Overhead FREQ MHz CPU% CORE_BUSY%
Command Shared Object Symbol
........................................ ......... ..... ..........
........... ................ ......................
99.54% 99.54% 99.53% 99.53% 99.53% 2301 96 99
tchain_edit tchain_edit [.] f3
0.20% 0.20% 0.20% 0.20% 0.20% 2301 98 99
tchain_edit tchain_edit [.] f2
0.05% 0.05% 0.05% 0.05% 0.05% 2300 98 99
tchain_edit [kernel.vmlinux] [k] read_tsc
Changes since V1:
- Save cpu max freq to header when recording
- Read cpu max freq and msr type from header when reporting
Changes since V2:
- Introduce generic FEAT for CPU related data stored
- Make cpu max freq and msr type part of perf_session_env
- rename cpu_u to cpu_util
- Don't save sample value in perf_sample and discards new iterator.
Calculating the freq_perf_info in add_entry_cb callback
- Introduce symbol_conf.freq_perf_type for related hpp column visibility
Changes since V3:
- add a identifier 'tag' for CPU attributes, max frequency.
- add backpointers to evlist for env, and evsel for evlist.
- Use bitmask for freq_perf_type
- Replace macros by functions to caculate freq, cpu_util and core_busy
- Move all caculation codes under symbol_conf.show_freq_perf condition.
Changes since V4:
- Store cpu attributes id as tag and more readable cpu_attr
Arnaldo Carvalho de Melo (1):
perf evsel: Add a backpointer to the evlist a evsel is in
Kan Liang (6):
perf,tools: introduce generic FEAT for CPU attributes
perf,tools: read msr pmu type from header.
perf,tools: rename perf_session_env and add backpointer to evlist
perf,tools: Dump per-sample freq/CPU%/CORE_BUSY% in report -D
perf,tools: caculate and save freq/CPU%/CORE_BUSY% in he_stat
perf,tools: Show freq/CPU%/CORE_BUSY% in perf report --stdio
tools/perf/Documentation/perf-report.txt | 12 ++++++
tools/perf/arch/common.c | 4 +-
tools/perf/arch/common.h | 2 +-
tools/perf/builtin-report.c | 56 +++++++++++++++++++++++++
tools/perf/ui/browser.h | 4 +-
tools/perf/ui/browsers/header.c | 2 +-
tools/perf/ui/browsers/hists.c | 12 +++---
tools/perf/ui/hist.c | 71 +++++++++++++++++++++++++++++---
tools/perf/util/cpumap.c | 32 ++++++++++++++
tools/perf/util/cpumap.h | 1 +
tools/perf/util/evlist.c | 2 +
tools/perf/util/evlist.h | 1 +
tools/perf/util/evsel.c | 2 +
tools/perf/util/evsel.h | 4 ++
tools/perf/util/header.c | 61 +++++++++++++++++++++++++++
tools/perf/util/header.h | 17 +++++++-
tools/perf/util/hist.h | 7 +++-
tools/perf/util/session.c | 36 +++++++++++++---
tools/perf/util/session.h | 64 ++++++++++++++++++++++++++++
tools/perf/util/sort.c | 3 ++
tools/perf/util/sort.h | 3 ++
tools/perf/util/symbol.c | 4 +-
tools/perf/util/symbol.h | 16 +++++--
23 files changed, 385 insertions(+), 31 deletions(-)
--
1.8.3.1
From: Kan Liang <[email protected]>
This patch introduces generic FEAT for CPU attributes. For the patch
set, we only need cpu max frequency. But it can be easily extented to
support more other CPU attributes.
The cpu max frequency is from the first online cpu.
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/util/cpumap.c | 32 +++++++++++++++++++++++++++
tools/perf/util/cpumap.h | 1 +
tools/perf/util/header.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++
tools/perf/util/header.h | 12 ++++++++++
4 files changed, 102 insertions(+)
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 3667e21..ef7e57e 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -499,3 +499,35 @@ int cpu__setup_cpunode_map(void)
closedir(dir1);
return 0;
}
+
+u64 get_cpu_max_freq(void)
+{
+ const char *mnt;
+ char path[PATH_MAX], tmp;
+ FILE *fp;
+ u64 freq;
+ int cpu = 0;
+ int ret;
+
+ mnt = sysfs__mountpoint();
+ if (!mnt)
+ return 0;
+
+ snprintf(path, PATH_MAX, "%s/devices/system/cpu/online", mnt);
+ fp = fopen(path, "r");
+ if (fp) {
+ ret = fscanf(fp, "%u%c", &cpu, &tmp);
+ fclose(fp);
+ if (ret < 1)
+ return 0;
+ }
+
+ snprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", mnt, cpu);
+ fp = fopen(path, "r");
+ if (!fp)
+ return 0;
+ ret = fscanf(fp, "%lu", &freq);
+ fclose(fp);
+
+ return (ret == 1) ? freq : 0;
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 0af9cec..8fd2bd6 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -58,6 +58,7 @@ int max_node_num;
int *cpunode_map;
int cpu__setup_cpunode_map(void);
+u64 get_cpu_max_freq(void);
static inline int cpu__max_node(void)
{
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 179b2bd..274792b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -862,6 +862,23 @@ write_it:
return do_write_string(fd, buffer);
}
+static int write_cpu_attributes(int fd, struct perf_header *h __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ u32 tag_id;
+ u64 freq;
+ int ret;
+
+ tag_id = PERF_HEADER_CPU_MAX_FREQ;
+ ret = do_write(fd, &tag_id, sizeof(tag_id));
+ if (ret < 0)
+ return ret;
+
+ freq = get_cpu_max_freq();
+
+ return do_write(fd, &freq, sizeof(freq));
+}
+
static int write_branch_stack(int fd __maybe_unused,
struct perf_header *h __maybe_unused,
struct perf_evlist *evlist __maybe_unused)
@@ -1154,6 +1171,11 @@ static void print_cpuid(struct perf_header *ph, int fd __maybe_unused, FILE *fp)
fprintf(fp, "# cpuid : %s\n", ph->env.cpuid);
}
+static void print_cpu_attributes(struct perf_header *ph, int fd __maybe_unused, FILE *fp)
+{
+ fprintf(fp, "# CPU attributes: max frequency = %lu KHz\n", ph->env.cpu.freq);
+}
+
static void print_branch_stack(struct perf_header *ph __maybe_unused,
int fd __maybe_unused, FILE *fp)
{
@@ -1467,6 +1489,40 @@ static int process_cpuid(struct perf_file_section *section __maybe_unused,
return ph->env.cpuid ? 0 : -ENOMEM;
}
+static int process_cpu_attributes(struct perf_file_section *section __maybe_unused,
+ struct perf_header *ph, int fd,
+ void *data __maybe_unused)
+{
+ ssize_t ret;
+ u32 tag_id;
+ u64 nr;
+
+
+ ret = readn(fd, &tag_id, sizeof(tag_id));
+ if (ret != sizeof(tag_id))
+ return -1;
+
+ if (ph->needs_swap)
+ nr = bswap_32(tag_id);
+
+ if (tag_id >= PERF_HEADER_CPU_ATTR_MAX) {
+ pr_debug("The number of cpu attributes is not expected. "
+ "You may need to upgrade the perf tool.\n");
+ return -1;
+ }
+
+ ret = readn(fd, &nr, sizeof(nr));
+ if (ret != sizeof(nr))
+ return -1;
+
+ if (ph->needs_swap)
+ nr = bswap_64(nr);
+
+ ph->env.cpu_attr[tag_id] = nr;
+
+ return 0;
+}
+
static int process_total_mem(struct perf_file_section *section __maybe_unused,
struct perf_header *ph, int fd,
void *data __maybe_unused)
@@ -1899,6 +1955,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings),
FEAT_OPP(HEADER_GROUP_DESC, group_desc),
FEAT_OPP(HEADER_AUXTRACE, auxtrace),
+ FEAT_OPP(HEADER_CPU_ATTR, cpu_attributes),
};
struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 9b53b65..053b42d 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -31,6 +31,7 @@ enum {
HEADER_PMU_MAPPINGS,
HEADER_GROUP_DESC,
HEADER_AUXTRACE,
+ HEADER_CPU_ATTR,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
@@ -66,6 +67,11 @@ struct perf_header;
int perf_file_header__read(struct perf_file_header *header,
struct perf_header *ph, int fd);
+enum perf_header_cpu_attr {
+ PERF_HEADER_CPU_MAX_FREQ = 0,
+ PERF_HEADER_CPU_ATTR_MAX,
+};
+
struct perf_session_env {
char *hostname;
char *os_release;
@@ -89,6 +95,12 @@ struct perf_session_env {
char *sibling_threads;
char *numa_nodes;
char *pmu_mappings;
+ union {
+ u64 cpu_attr[PERF_HEADER_CPU_ATTR_MAX];
+ struct {
+ u64 freq;
+ } cpu;
+ };
};
struct perf_header {
--
1.8.3.1
From: Kan Liang <[email protected]>
Get msr pmu type when processing pmu_mappings
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/util/header.c | 3 +++
tools/perf/util/header.h | 1 +
2 files changed, 4 insertions(+)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 274792b..9db7c57 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1793,6 +1793,9 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused
/* include a NULL character at the end */
strbuf_add(&sb, "", 1);
+ if (!strcmp(name, "msr"))
+ ph->env.msr_pmu_type = type;
+
free(name);
pmu_num--;
}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 053b42d..275ca49 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -101,6 +101,7 @@ struct perf_session_env {
u64 freq;
} cpu;
};
+ unsigned int msr_pmu_type;
};
struct perf_header {
--
1.8.3.1
From: Kan Liang <[email protected]>
Rename perf_session_env to perf_env.
Add backpointer to evlist, so we can easily access env when processing
something where we have a evsel or evlist.
Suggested-by: Arnaldo Carvalho de Melo <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/arch/common.c | 4 ++--
tools/perf/arch/common.h | 2 +-
tools/perf/ui/browser.h | 4 ++--
tools/perf/ui/browsers/header.c | 2 +-
tools/perf/ui/browsers/hists.c | 12 ++++++------
tools/perf/util/evlist.h | 1 +
tools/perf/util/header.c | 1 +
tools/perf/util/header.h | 4 ++--
tools/perf/util/hist.h | 4 ++--
tools/perf/util/session.c | 2 +-
tools/perf/util/symbol.c | 4 ++--
tools/perf/util/symbol.h | 4 ++--
12 files changed, 23 insertions(+), 21 deletions(-)
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index b7bb42c..b00dfd92 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -128,7 +128,7 @@ static const char *normalize_arch(char *arch)
return arch;
}
-static int perf_session_env__lookup_binutils_path(struct perf_session_env *env,
+static int perf_session_env__lookup_binutils_path(struct perf_env *env,
const char *name,
const char **path)
{
@@ -206,7 +206,7 @@ out_error:
return -1;
}
-int perf_session_env__lookup_objdump(struct perf_session_env *env)
+int perf_session_env__lookup_objdump(struct perf_env *env)
{
/*
* For live mode, env->arch will be NULL and we can use
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index ede246e..20176df 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -5,6 +5,6 @@
extern const char *objdump_path;
-int perf_session_env__lookup_objdump(struct perf_session_env *env);
+int perf_session_env__lookup_objdump(struct perf_env *env);
#endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index 52be871..f3cef56 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -61,8 +61,8 @@ int ui_browser__help_window(struct ui_browser *browser, const char *text);
bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text);
int ui_browser__input_window(const char *title, const char *text, char *input,
const char *exit_msg, int delay_sec);
-struct perf_session_env;
-int tui__header_window(struct perf_session_env *env);
+struct perf_env;
+int tui__header_window(struct perf_env *env);
void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence);
unsigned int ui_browser__argv_refresh(struct ui_browser *browser);
diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c
index f106817..edbeaaf 100644
--- a/tools/perf/ui/browsers/header.c
+++ b/tools/perf/ui/browsers/header.c
@@ -91,7 +91,7 @@ static int ui__list_menu(int argc, char * const argv[])
return list_menu__run(&menu);
}
-int tui__header_window(struct perf_session_env *env)
+int tui__header_window(struct perf_env *env)
{
int i, argc = 0;
char **argv;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 10c7ec0..cf86f2d 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -26,7 +26,7 @@ struct hist_browser {
struct map_symbol *selection;
struct hist_browser_timer *hbt;
struct pstack *pstack;
- struct perf_session_env *env;
+ struct perf_env *env;
int print_seq;
bool show_dso;
bool show_headers;
@@ -1214,7 +1214,7 @@ static int hist_browser__dump(struct hist_browser *browser)
static struct hist_browser *hist_browser__new(struct hists *hists,
struct hist_browser_timer *hbt,
- struct perf_session_env *env)
+ struct perf_env *env)
{
struct hist_browser *browser = zalloc(sizeof(*browser));
@@ -1695,7 +1695,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
bool left_exits,
struct hist_browser_timer *hbt,
float min_pcnt,
- struct perf_session_env *env)
+ struct perf_env *env)
{
struct hists *hists = evsel__hists(evsel);
struct hist_browser *browser = hist_browser__new(hists, hbt, env);
@@ -2016,7 +2016,7 @@ struct perf_evsel_menu {
struct perf_evsel *selection;
bool lost_events, lost_events_warned;
float min_pcnt;
- struct perf_session_env *env;
+ struct perf_env *env;
};
static void perf_evsel_menu__write(struct ui_browser *browser,
@@ -2169,7 +2169,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
int nr_entries, const char *help,
struct hist_browser_timer *hbt,
float min_pcnt,
- struct perf_session_env *env)
+ struct perf_env *env)
{
struct perf_evsel *pos;
struct perf_evsel_menu menu = {
@@ -2202,7 +2202,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
struct hist_browser_timer *hbt,
float min_pcnt,
- struct perf_session_env *env)
+ struct perf_env *env)
{
int nr_entries = evlist->nr_entries;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 436e358..b39a619 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -56,6 +56,7 @@ struct perf_evlist {
struct cpu_map *cpus;
struct perf_evsel *selected;
struct events_stats stats;
+ struct perf_env *env;
};
struct perf_evsel_str_handler {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 9db7c57..f67dab8 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2574,6 +2574,7 @@ int perf_session__read_header(struct perf_session *session)
if (session->evlist == NULL)
return -ENOMEM;
+ session->evlist->env = &header->env;
if (perf_data_file__is_pipe(file))
return perf_header__read_pipe(session);
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 275ca49..80f8205 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -72,7 +72,7 @@ enum perf_header_cpu_attr {
PERF_HEADER_CPU_ATTR_MAX,
};
-struct perf_session_env {
+struct perf_env {
char *hostname;
char *os_release;
char *version;
@@ -111,7 +111,7 @@ struct perf_header {
u64 data_size;
u64 feat_offset;
DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
- struct perf_session_env env;
+ struct perf_env env;
};
struct perf_evlist;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index bc528d5..de6d58e 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -313,7 +313,7 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
struct hist_browser_timer *hbt,
float min_pcnt,
- struct perf_session_env *env);
+ struct perf_env *env);
int script_browse(const char *script_opt);
#else
static inline
@@ -321,7 +321,7 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
const char *help __maybe_unused,
struct hist_browser_timer *hbt __maybe_unused,
float min_pcnt __maybe_unused,
- struct perf_session_env *env __maybe_unused)
+ struct perf_env *env __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 18722e7..8a4537e 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -170,7 +170,7 @@ static void perf_session__delete_threads(struct perf_session *session)
machine__delete_threads(&session->machines.host);
}
-static void perf_session_env__exit(struct perf_session_env *env)
+static void perf_session_env__exit(struct perf_env *env)
{
zfree(&env->hostname);
zfree(&env->os_release);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 725640f..f14c06e 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1843,7 +1843,7 @@ static void vmlinux_path__exit(void)
zfree(&vmlinux_path);
}
-static int vmlinux_path__init(struct perf_session_env *env)
+static int vmlinux_path__init(struct perf_env *env)
{
struct utsname uts;
char bf[PATH_MAX];
@@ -1954,7 +1954,7 @@ static bool symbol__read_kptr_restrict(void)
return value;
}
-int symbol__init(struct perf_session_env *env)
+int symbol__init(struct perf_env *env)
{
const char *symfs;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index a4cde92..440ba8a 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -252,8 +252,8 @@ int modules__parse(const char *filename, void *arg,
int filename__read_debuglink(const char *filename, char *debuglink,
size_t size);
-struct perf_session_env;
-int symbol__init(struct perf_session_env *env);
+struct perf_env;
+int symbol__init(struct perf_env *env);
void symbol__exit(void);
void symbol__elf_init(void);
struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
--
1.8.3.1
From: Arnaldo Carvalho de Melo <[email protected]>
So that functions that deal primarily with an evsel to access
information that concerns the whole evlist it is in.
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/util/evlist.c | 2 ++
tools/perf/util/evsel.c | 2 ++
tools/perf/util/evsel.h | 4 ++++
3 files changed, 8 insertions(+)
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 373f65b..a8cc440 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -98,6 +98,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
evlist__for_each_safe(evlist, n, pos) {
list_del_init(&pos->node);
+ pos->evlist = NULL;
perf_evsel__delete(pos);
}
@@ -125,6 +126,7 @@ void perf_evlist__delete(struct perf_evlist *evlist)
void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
{
+ entry->evlist = evlist;
list_add_tail(&entry->node, &evlist->entries);
entry->idx = evlist->nr_entries;
entry->tracking = !entry->idx;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index b096ef7..bac25f4 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -206,6 +206,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
evsel->leader = evsel;
evsel->unit = "";
evsel->scale = 1.0;
+ evsel->evlist = NULL;
INIT_LIST_HEAD(&evsel->node);
INIT_LIST_HEAD(&evsel->config_terms);
perf_evsel__object.init(evsel);
@@ -1026,6 +1027,7 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
void perf_evsel__exit(struct perf_evsel *evsel)
{
assert(list_empty(&evsel->node));
+ assert(evsel->evlist == NULL);
perf_evsel__free_fd(evsel);
perf_evsel__free_id(evsel);
perf_evsel__free_config_terms(evsel);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 93ac6b1..298e6bb 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -60,6 +60,9 @@ struct perf_evsel_config_term {
/** struct perf_evsel - event selector
*
+ * @evlist - evlist this evsel is in, if it is in one.
+ * @node - To insert it into evlist->entries or in other list_heads, say in
+ * the event parsing routines.
* @name - Can be set to retain the original event name passed by the user,
* so that when showing results in tools such as 'perf stat', we
* show the name used, not some alias.
@@ -73,6 +76,7 @@ struct perf_evsel_config_term {
*/
struct perf_evsel {
struct list_head node;
+ struct perf_evlist *evlist;
struct perf_event_attr attr;
char *filter;
struct xyarray *fd;
--
1.8.3.1
From: Kan Liang <[email protected]>
The group read results from cycles/ref-cycles/TSC/ASTATE/MSTATE event
can be used to calculate the frequency, CPU Utilization and percent
performance during each sampling period.
This patch shows them in report -D.
Here is an example:
$ perf record -e
'{cycles,ref-cycles,msr/tsc/,msr/mperf/,msr/aperf/}:S' ~/tchain_edit
Here is one sample from perf report -D
1972044565107 0x3498 [0x88]: PERF_RECORD_SAMPLE(IP, 0x2): 10608/10608:
0x4005fd period: 564686 addr: 0
... sample_read:
.... group nr 5
..... id 0000000000000012, value 0000000002143901
..... id 0000000000000052, value 0000000002143896
..... id 0000000000000094, value 00000000021e443d
..... id 00000000000000d4, value 00000000021db984
..... id 0000000000000114, value 00000000021db964
..... Freq 2301 MHz
..... CPU% 98%
..... CORE_BUSY% 99%
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/util/session.c | 31 ++++++++++++++++++++---
tools/perf/util/session.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 91 insertions(+), 4 deletions(-)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 8a4537e..c522b0a 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -7,7 +7,6 @@
#include <sys/mman.h>
#include "evlist.h"
-#include "evsel.h"
#include "session.h"
#include "tool.h"
#include "sort.h"
@@ -877,8 +876,15 @@ static void perf_evlist__print_tstamp(struct perf_evlist *evlist,
printf("%" PRIu64 " ", sample->time);
}
-static void sample_read__printf(struct perf_sample *sample, u64 read_format)
+static void sample_read__printf(struct perf_sample *sample,
+ struct perf_evsel *evsel)
{
+ u64 read_format = evsel->attr.read_format;
+ struct perf_evlist *evlist = evsel->evlist;
+ struct perf_sample_id *sid;
+ perf_freq_t data = { 0 };
+ u64 cpu_max_freq = evlist->env->cpu_attr[PERF_HEADER_CPU_MAX_FREQ];
+
printf("... sample_read:\n");
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -901,10 +907,26 @@ static void sample_read__printf(struct perf_sample *sample, u64 read_format)
printf("..... id %016" PRIx64
", value %016" PRIx64 "\n",
value->id, value->value);
+
+ sid = perf_evlist__id2sid(evlist, value->id);
+ evsel = sid->evsel;
+ if (evsel != NULL)
+ perf_freq__init(evlist->env->msr_pmu_type,
+ evsel, data, value->value);
}
} else
printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n",
sample->read.one.id, sample->read.one.value);
+
+ if (perf_freq__has_freq(data))
+ printf("..... Freq %lu MHz\n",
+ perf_freq__get_freq(data, cpu_max_freq/1000));
+ if (perf_freq__has_cpu_util(data))
+ printf("..... CPU%% %lu%%\n",
+ perf_freq__get_cpu_util(data));
+ if (perf_freq__has_core_busy(data))
+ printf("..... CORE_BUSY%% %lu%%\n",
+ perf_freq__get_core_busy(data));
}
static void dump_event(struct perf_evlist *evlist, union perf_event *event,
@@ -964,7 +986,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
printf("... transaction: %" PRIx64 "\n", sample->transaction);
if (sample_type & PERF_SAMPLE_READ)
- sample_read__printf(sample, evsel->attr.read_format);
+ sample_read__printf(sample, evsel);
}
static struct machine *machines__find_for_cpumode(struct machines *machines,
@@ -1079,11 +1101,12 @@ static int machines__deliver_event(struct machines *machines,
switch (event->header.type) {
case PERF_RECORD_SAMPLE:
- dump_sample(evsel, event, sample);
if (evsel == NULL) {
++evlist->stats.nr_unknown_id;
return 0;
}
+ dump_sample(evsel, event, sample);
+
if (machine == NULL) {
++evlist->stats.nr_unprocessable_samples;
return 0;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index b44afc7..83bf4a9 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -11,6 +11,7 @@
#include "ordered-events.h"
#include <linux/rbtree.h>
#include <linux/perf_event.h>
+#include "evsel.h"
struct ip_callchain;
struct thread;
@@ -42,6 +43,69 @@ struct perf_session {
#define PRINT_IP_OPT_ONELINE (1<<4)
#define PRINT_IP_OPT_SRCLINE (1<<5)
+#define PERF_MSR_TSC 0
+#define PERF_MSR_APERF 1
+#define PERF_MSR_MPERF 2
+
+enum perf_freq_perf_index {
+ FREQ_PERF_TSC = 0,
+ FREQ_PERF_APERF = 1,
+ FREQ_PERF_MPERF = 2,
+ FREQ_PERF_CYCLES = 3,
+ FREQ_PERF_REF_CYCLES = 4,
+
+ FREQ_PERF_MAX
+};
+
+typedef u64 perf_freq_t[FREQ_PERF_MAX];
+
+static inline void perf_freq__init(unsigned int msr_pmu_type,
+ struct perf_evsel *evsel,
+ perf_freq_t array,
+ u64 value)
+{
+ if (evsel->attr.type == msr_pmu_type) {
+ if (evsel->attr.config == PERF_MSR_TSC)
+ array[FREQ_PERF_TSC] = value;
+ if (evsel->attr.config == PERF_MSR_APERF)
+ array[FREQ_PERF_APERF] = value;
+ if (evsel->attr.config == PERF_MSR_MPERF)
+ array[FREQ_PERF_MPERF] = value;
+ }
+ if (evsel->attr.type == PERF_TYPE_HARDWARE) {
+ if (evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES)
+ array[FREQ_PERF_CYCLES] = value;
+ if (evsel->attr.config == PERF_COUNT_HW_REF_CPU_CYCLES)
+ array[FREQ_PERF_REF_CYCLES] = value;
+ }
+}
+
+static inline bool perf_freq__has_freq(perf_freq_t array)
+{
+ return ((array[FREQ_PERF_CYCLES] > 0) && (array[FREQ_PERF_REF_CYCLES] > 0));
+}
+static inline u64 perf_freq__get_freq(perf_freq_t array, u64 cpu_max_freq)
+{
+ return ((array[FREQ_PERF_CYCLES] * cpu_max_freq) / array[FREQ_PERF_REF_CYCLES]);
+}
+static inline bool perf_freq__has_cpu_util(perf_freq_t array)
+{
+ return ((array[FREQ_PERF_TSC] > 0) && (array[FREQ_PERF_REF_CYCLES] > 0));
+}
+static inline u64 perf_freq__get_cpu_util(perf_freq_t array)
+{
+ return ((100 * array[FREQ_PERF_REF_CYCLES]) / array[FREQ_PERF_TSC]);
+}
+
+static inline bool perf_freq__has_core_busy(perf_freq_t array)
+{
+ return ((array[FREQ_PERF_APERF] > 0) && (array[FREQ_PERF_MPERF] > 0));
+}
+static inline u64 perf_freq__get_core_busy(perf_freq_t array)
+{
+ return ((100 * array[FREQ_PERF_APERF]) / array[FREQ_PERF_MPERF]);
+}
+
struct perf_tool;
struct perf_session *perf_session__new(struct perf_data_file *file,
--
1.8.3.1
From: Kan Liang <[email protected]>
Caculate freq/CPU%/CORE_BUSY% in add_entry_cb, and update the value in
he_stat.
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/builtin-report.c | 36 ++++++++++++++++++++++++++++++++++++
tools/perf/util/sort.h | 3 +++
2 files changed, 39 insertions(+)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 62b285e..1bc74acd 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -89,6 +89,38 @@ static int report__config(const char *var, const char *value, void *cb)
return perf_default_config(var, value, cb);
}
+static void set_he_freq_perf(struct perf_session *session,
+ struct hist_entry_iter *iter)
+{
+ struct hist_entry *he = iter->he;
+ struct perf_evsel *evsel = iter->evsel;
+ struct perf_evsel *leader = evsel;
+ struct perf_sample *sample = iter->sample;
+ struct perf_evlist *evlist = session->evlist;
+ u64 cpu_max_freq = session->header.env.cpu_attr[PERF_HEADER_CPU_MAX_FREQ];
+ perf_freq_t data = { 0 };
+ u64 nr = 0;
+
+ perf_freq__init(session->header.env.msr_pmu_type,
+ evsel, data,
+ sample->read.group.values[nr].value);
+ evlist__for_each_continue(evlist, evsel) {
+ if ((evsel->leader != leader) ||
+ (++nr >= sample->read.group.nr))
+ break;
+ perf_freq__init(session->header.env.msr_pmu_type,
+ evsel, data,
+ sample->read.group.values[nr].value);
+ }
+
+ if (perf_freq__has_freq(data))
+ he->stat.freq = perf_freq__get_freq(data, cpu_max_freq/1000);
+ if (perf_freq__has_cpu_util(data))
+ he->stat.cpu_util = perf_freq__get_cpu_util(data);
+ if (perf_freq__has_core_busy(data))
+ he->stat.core_busy = perf_freq__get_core_busy(data);
+}
+
static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct addr_location *al, bool single,
void *arg)
@@ -100,6 +132,10 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct mem_info *mi;
struct branch_info *bi;
+ if ((iter->ops == &hist_iter_normal) &&
+ perf_evsel__is_group_leader(evsel))
+ set_he_freq_perf(rep->session, iter);
+
if (!ui__has_annotation())
return 0;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 3c2a399..9ed52e6 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -54,6 +54,9 @@ struct he_stat {
u64 period_guest_us;
u64 weight;
u32 nr_events;
+ u64 freq;
+ u64 cpu_util;
+ u64 core_busy;
};
struct hist_entry_diff {
--
1.8.3.1
From: Kan Liang <[email protected]>
Show frequency, CPU Utilization and percent performance for each symbol
in perf report by --stdio --show-freq-perf
In sampling group, only group leader do sampling. So only need to print
group leader's freq in --group.
Here is an example.
$ perf report --stdio --group --show-freq-perf
Overhead FREQ MHz CPU% CORE_BUSY%
Command Shared Object Symbol
........................................ ......... ..... ..........
........... ................ ......................
99.54% 99.54% 99.53% 99.53% 99.53% 2301 96 99
tchain_edit tchain_edit [.] f3
0.20% 0.20% 0.20% 0.20% 0.20% 2301 98 99
tchain_edit tchain_edit [.] f2
0.05% 0.05% 0.05% 0.05% 0.05% 2300 98 99
tchain_edit [kernel.vmlinux] [k] read_tsc
Signed-off-by: Kan Liang <[email protected]>
---
tools/perf/Documentation/perf-report.txt | 12 ++++++
tools/perf/builtin-report.c | 22 +++++++++-
tools/perf/ui/hist.c | 71 +++++++++++++++++++++++++++++---
tools/perf/util/hist.h | 3 ++
tools/perf/util/session.c | 33 ++++++++-------
tools/perf/util/sort.c | 3 ++
tools/perf/util/symbol.h | 12 +++++-
7 files changed, 134 insertions(+), 22 deletions(-)
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index a18ba75..9f979a7 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -306,6 +306,18 @@ OPTIONS
special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See
'perf mem' for simpler access.
+--show-freq-perf::
+ Show CPU frequency and performance result from sample read.
+ To generate the frequency and performance output, the perf.data file
+ must have been obtained by group read and using special events cycles,
+ ref-cycles, msr/tsc/, msr/aperf/ or msr/mperf/
+ Freq MHz: The frequency during the sample interval. Needs cycles and
+ ref-cycles event.
+ CPU%: CPU utilization during the sample interval. Needs ref-cycles and
+ msr/tsc/ events.
+ CORE_BUSY%: actual percent performance (APERF/MPERF%) during the
+ sample interval. Needs msr/aperf/ and msr/mperf/ events.
+
--percent-limit::
Do not show entries which have an overhead under that percent.
(Default: 0).
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1bc74acd..a074e6a 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -133,7 +133,8 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct branch_info *bi;
if ((iter->ops == &hist_iter_normal) &&
- perf_evsel__is_group_leader(evsel))
+ perf_evsel__is_group_leader(evsel) &&
+ symbol_conf.show_freq_perf)
set_he_freq_perf(rep->session, iter);
if (!ui__has_annotation())
@@ -772,6 +773,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
"Enable kernel symbol demangling"),
OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
+ OPT_BOOLEAN(0, "show-freq-perf", &symbol_conf.show_freq_perf,
+ "show CPU freqency and performance info"),
OPT_CALLBACK(0, "percent-limit", &report, "percent",
"Don't show entries under that percent", parse_percent_limit),
OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
@@ -788,7 +791,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
struct perf_data_file file = {
.mode = PERF_DATA_MODE_READ,
};
+ struct perf_evsel *pos;
int ret = hists__init();
+ perf_freq_t freq_data_status = { 0 };
if (ret < 0)
return ret;
@@ -873,6 +878,21 @@ repeat:
symbol_conf.cumulate_callchain = false;
}
+
+ if (symbol_conf.show_freq_perf) {
+ symbol_conf.freq_perf_type = 0;
+ evlist__for_each(session->evlist, pos) {
+ perf_freq__init(session->header.env.msr_pmu_type,
+ pos, freq_data_status, 1);
+ }
+ if (perf_freq__has_freq(freq_data_status))
+ symbol_conf.freq_perf_type |= 1U << DISPLAY_FREQ;
+ if (perf_freq__has_cpu_util(freq_data_status))
+ symbol_conf.freq_perf_type |= 1U << DISPLAY_CPU_UTIL;
+ if (perf_freq__has_core_busy(freq_data_status))
+ symbol_conf.freq_perf_type |= 1U << DISPLAY_CORE_BUSY;
+ }
+
if (setup_sorting() < 0) {
if (sort_order)
parse_options_usage(report_usage, options, "s", 1);
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 25d6083..c2be455 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -17,7 +17,7 @@
static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt, int len,
- hpp_snprint_fn print_fn, bool fmt_percent)
+ hpp_snprint_fn print_fn, bool fmt_percent, bool single)
{
int ret;
struct hists *hists = he->hists;
@@ -36,7 +36,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
} else
ret = hpp__call_print_fn(hpp, print_fn, fmt, len, get_field(he));
- if (perf_evsel__is_group_event(evsel)) {
+ if (perf_evsel__is_group_event(evsel) && !single) {
int prev_idx, idx_delta;
struct hist_entry *pair;
int nr_members = evsel->nr_members;
@@ -109,10 +109,16 @@ int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent)
{
int len = fmt->user_len ?: fmt->len;
+ bool single = false;
+
+ if (((fmt == &perf_hpp__format[PERF_HPP__FREQ]) ||
+ (fmt == &perf_hpp__format[PERF_HPP__CPU_UTIL]) ||
+ (fmt == &perf_hpp__format[PERF_HPP__CORE_BUSY])))
+ single = true;
if (symbol_conf.field_sep) {
return __hpp__fmt(hpp, he, get_field, fmtstr, 1,
- print_fn, fmt_percent);
+ print_fn, fmt_percent, single);
}
if (fmt_percent)
@@ -120,7 +126,7 @@ int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
else
len -= 1;
- return __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, fmt_percent);
+ return __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, fmt_percent, single);
}
int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
@@ -234,6 +240,30 @@ static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
}
+static int hpp__single_width_fn(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct perf_evsel *evsel)
+{
+ int len = fmt->user_len ?: fmt->len;
+
+ if (symbol_conf.event_group && !symbol_conf.show_freq_perf)
+ len = max(len, evsel->nr_members * fmt->len);
+
+ if (len < (int)strlen(fmt->name))
+ len = strlen(fmt->name);
+
+ return len;
+}
+
+static int hpp__single_header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct perf_evsel *evsel)
+{
+ int len = hpp__single_width_fn(fmt, hpp, evsel);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
+}
+
+
static int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
{
va_list args;
@@ -363,6 +393,9 @@ HPP_PERCENT_ACC_FNS(overhead_acc, period)
HPP_RAW_FNS(samples, nr_events)
HPP_RAW_FNS(period, period)
+HPP_RAW_FNS(freq, freq)
+HPP_RAW_FNS(cpu_util, cpu_util)
+HPP_RAW_FNS(core_busy, core_busy)
static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *a __maybe_unused,
@@ -395,6 +428,17 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
.sort = hpp__sort_ ## _fn, \
}
+#define HPP__SINGLE_PRINT_FNS(_name, _fn) \
+ { \
+ .name = _name, \
+ .header = hpp__single_header_fn, \
+ .width = hpp__single_width_fn, \
+ .entry = hpp__entry_ ## _fn, \
+ .cmp = hpp__nop_cmp, \
+ .collapse = hpp__nop_cmp, \
+ .sort = hpp__sort_ ## _fn, \
+ }
+
#define HPP__PRINT_FNS(_name, _fn) \
{ \
.name = _name, \
@@ -414,7 +458,10 @@ struct perf_hpp_fmt perf_hpp__format[] = {
HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us),
HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc),
HPP__PRINT_FNS("Samples", samples),
- HPP__PRINT_FNS("Period", period)
+ HPP__PRINT_FNS("Period", period),
+ HPP__SINGLE_PRINT_FNS("FREQ MHz", freq),
+ HPP__SINGLE_PRINT_FNS("CPU%", cpu_util),
+ HPP__SINGLE_PRINT_FNS("CORE_BUSY%", core_busy)
};
LIST_HEAD(perf_hpp__list);
@@ -485,6 +532,15 @@ void perf_hpp__init(void)
if (symbol_conf.show_total_period)
perf_hpp__column_enable(PERF_HPP__PERIOD);
+ if (symbol_conf.show_freq_perf) {
+ if (symbol_conf.freq_perf_type & (1U << DISPLAY_FREQ))
+ perf_hpp__column_enable(PERF_HPP__FREQ);
+ if (symbol_conf.freq_perf_type & (1U << DISPLAY_CPU_UTIL))
+ perf_hpp__column_enable(PERF_HPP__CPU_UTIL);
+ if (symbol_conf.freq_perf_type & (1U << DISPLAY_CORE_BUSY))
+ perf_hpp__column_enable(PERF_HPP__CORE_BUSY);
+ }
+
/* prepend overhead field for backward compatiblity. */
list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
if (list_empty(list))
@@ -652,6 +708,9 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
return;
switch (idx) {
+ case PERF_HPP__CPU_UTIL:
+ fmt->len = 5;
+ break;
case PERF_HPP__OVERHEAD:
case PERF_HPP__OVERHEAD_SYS:
case PERF_HPP__OVERHEAD_US:
@@ -661,6 +720,8 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
case PERF_HPP__OVERHEAD_GUEST_SYS:
case PERF_HPP__OVERHEAD_GUEST_US:
+ case PERF_HPP__FREQ:
+ case PERF_HPP__CORE_BUSY:
fmt->len = 9;
break;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index de6d58e..df07f28 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -237,6 +237,9 @@ enum {
PERF_HPP__OVERHEAD_ACC,
PERF_HPP__SAMPLES,
PERF_HPP__PERIOD,
+ PERF_HPP__FREQ,
+ PERF_HPP__CPU_UTIL,
+ PERF_HPP__CORE_BUSY,
PERF_HPP__MAX_INDEX
};
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c522b0a..632c4c7 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -907,26 +907,29 @@ static void sample_read__printf(struct perf_sample *sample,
printf("..... id %016" PRIx64
", value %016" PRIx64 "\n",
value->id, value->value);
-
- sid = perf_evlist__id2sid(evlist, value->id);
- evsel = sid->evsel;
- if (evsel != NULL)
- perf_freq__init(evlist->env->msr_pmu_type,
- evsel, data, value->value);
+ if (symbol_conf.show_freq_perf) {
+ sid = perf_evlist__id2sid(evlist, value->id);
+ evsel = sid->evsel;
+ if (evsel != NULL)
+ perf_freq__init(evlist->env->msr_pmu_type,
+ evsel, data, value->value);
+ }
}
} else
printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n",
sample->read.one.id, sample->read.one.value);
- if (perf_freq__has_freq(data))
- printf("..... Freq %lu MHz\n",
- perf_freq__get_freq(data, cpu_max_freq/1000));
- if (perf_freq__has_cpu_util(data))
- printf("..... CPU%% %lu%%\n",
- perf_freq__get_cpu_util(data));
- if (perf_freq__has_core_busy(data))
- printf("..... CORE_BUSY%% %lu%%\n",
- perf_freq__get_core_busy(data));
+ if (symbol_conf.show_freq_perf) {
+ if (perf_freq__has_freq(data))
+ printf("..... Freq %lu MHz\n",
+ perf_freq__get_freq(data, cpu_max_freq/1000));
+ if (perf_freq__has_cpu_util(data))
+ printf("..... CPU%% %lu%%\n",
+ perf_freq__get_cpu_util(data));
+ if (perf_freq__has_core_busy(data))
+ printf("..... CORE_BUSY%% %lu%%\n",
+ perf_freq__get_core_busy(data));
+ }
}
static void dump_event(struct perf_evlist *evlist, union perf_event *event,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 7e38716..8eb81db 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1303,6 +1303,9 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
DIM(PERF_HPP__SAMPLES, "sample"),
DIM(PERF_HPP__PERIOD, "period"),
+ DIM(PERF_HPP__FREQ, "freq"),
+ DIM(PERF_HPP__CPU_UTIL, "cpu_u"),
+ DIM(PERF_HPP__CORE_BUSY, "core_busy"),
};
#undef DIM
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 440ba8a..8b21fb4 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -80,6 +80,14 @@ static inline size_t symbol__size(const struct symbol *sym)
struct strlist;
struct intlist;
+enum freq_perf_type_index {
+ DISPLAY_FREQ = 0,
+ DISPLAY_CPU_UTIL,
+ DISPLAY_CORE_BUSY,
+
+ DISPLAY_MAX
+};
+
struct symbol_conf {
unsigned short priv_size;
unsigned short nr_events;
@@ -107,7 +115,8 @@ struct symbol_conf {
show_hist_headers,
branch_callstack,
has_filter,
- show_ref_callgraph;
+ show_ref_callgraph,
+ show_freq_perf;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
@@ -132,6 +141,7 @@ struct symbol_conf {
struct intlist *pid_list,
*tid_list;
const char *symfs;
+ u64 freq_perf_type;
};
extern struct symbol_conf symbol_conf;
--
1.8.3.1
On Tue, Aug 25, 2015 at 05:18:02AM -0400, Kan Liang wrote:
SNIP
>
> +static int process_cpu_attributes(struct perf_file_section *section __maybe_unused,
> + struct perf_header *ph, int fd,
> + void *data __maybe_unused)
> +{
> + ssize_t ret;
> + u32 tag_id;
> + u64 nr;
> +
> +
> + ret = readn(fd, &tag_id, sizeof(tag_id));
> + if (ret != sizeof(tag_id))
> + return -1;
> +
> + if (ph->needs_swap)
> + nr = bswap_32(tag_id);
> +
> + if (tag_id >= PERF_HEADER_CPU_ATTR_MAX) {
> + pr_debug("The number of cpu attributes is not expected. "
> + "You may need to upgrade the perf tool.\n");
> + return -1;
> + }
> +
> + ret = readn(fd, &nr, sizeof(nr));
> + if (ret != sizeof(nr))
> + return -1;
> +
> + if (ph->needs_swap)
> + nr = bswap_64(nr);
> +
> + ph->env.cpu_attr[tag_id] = nr;
this should be in the loop right? process it PERF_HEADER_CPU_ATTR_MAX times
I understand it's enough for now when there's only single attr,
but could you please put it into the loop, so the next time
we add cpu attribute we dont need to add this logic?
thanks,
jirka
On Tue, Aug 25, 2015 at 05:18:02AM -0400, Kan Liang wrote:
SNIP
> @@ -66,6 +67,11 @@ struct perf_header;
> int perf_file_header__read(struct perf_file_header *header,
> struct perf_header *ph, int fd);
>
> +enum perf_header_cpu_attr {
> + PERF_HEADER_CPU_MAX_FREQ = 0,
> + PERF_HEADER_CPU_ATTR_MAX,
> +};
> +
> struct perf_session_env {
> char *hostname;
> char *os_release;
> @@ -89,6 +95,12 @@ struct perf_session_env {
> char *sibling_threads;
> char *numa_nodes;
> char *pmu_mappings;
> + union {
> + u64 cpu_attr[PERF_HEADER_CPU_ATTR_MAX];
> + struct {
> + u64 freq;
should it b maxfreq ?
jirka
On Tue, Aug 25, 2015 at 05:18:07AM -0400, Kan Liang wrote:
> From: Kan Liang <[email protected]>
>
> Caculate freq/CPU%/CORE_BUSY% in add_entry_cb, and update the value in
> he_stat.
>
> Signed-off-by: Kan Liang <[email protected]>
> ---
> tools/perf/builtin-report.c | 36 ++++++++++++++++++++++++++++++++++++
> tools/perf/util/sort.h | 3 +++
> 2 files changed, 39 insertions(+)
>
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index 62b285e..1bc74acd 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -89,6 +89,38 @@ static int report__config(const char *var, const char *value, void *cb)
> return perf_default_config(var, value, cb);
> }
>
> +static void set_he_freq_perf(struct perf_session *session,
> + struct hist_entry_iter *iter)
> +{
> + struct hist_entry *he = iter->he;
> + struct perf_evsel *evsel = iter->evsel;
> + struct perf_evsel *leader = evsel;
> + struct perf_sample *sample = iter->sample;
> + struct perf_evlist *evlist = session->evlist;
> + u64 cpu_max_freq = session->header.env.cpu_attr[PERF_HEADER_CPU_MAX_FREQ];
so why not access the cpu_attr.freq ?
jirka