2021-10-29 22:55:54

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

The current logic for the perf missing feature has a bug that it can
wrongly clear some modifiers like G or H. Actually some PMUs don't
support any filtering or exclusion while others do. But we check it
as a global feature.

For example, the cycles event can have 'G' modifier to enable it only
in the guest mode on x86. When you don't run any VMs it'll return 0.

# perf stat -a -e cycles:G sleep 1

Performance counter stats for 'system wide':

0 cycles:G

1.000721670 seconds time elapsed

But when it's used with other pmu events that don't support G modifier,
it'll be reset and return non-zero values.

# perf stat -a -e cycles:G,msr/tsc/ sleep 1

Performance counter stats for 'system wide':

538,029,960 cycles:G
16,924,010,738 msr/tsc/

1.001815327 seconds time elapsed

This is because of the missing feature detection logic being global.
Add a hashmap to set pmu-specific exclude_host/guest features.

Reported-by: Stephane Eranian <[email protected]>
Signed-off-by: Namhyung Kim <[email protected]>
---
v3 changes)
* check memory allocation failure
* add more NULL check

v2 changes)
* change to enum perf_missing_pmu_features
* pass NULL to hashmap__find() to skip checking
* add a blank line after declaration

tools/perf/util/evsel.c | 54 ++++++++++++++++++++++++++++++++++++-----
tools/perf/util/evsel.h | 7 ++++++
2 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index dbfeceb2546c..d3ff4809627b 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1434,6 +1434,10 @@ void evsel__delete(struct evsel *evsel)
{
evsel__exit(evsel);
free(evsel);
+
+ /* just free it for the first evsel */
+ hashmap__free(perf_missing_features.pmu);
+ perf_missing_features.pmu = NULL;
}

void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
@@ -1791,6 +1795,23 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
return 0;
}

+#define PMU_HASH_BITS 4
+
+static size_t pmu_hash(const void *key, void *ctx __maybe_unused)
+{
+ const struct evsel *evsel = key;
+
+ return hash_bits(evsel->core.attr.type, PMU_HASH_BITS);
+}
+
+static bool pmu_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
+{
+ const struct evsel *a = key1;
+ const struct evsel *b = key2;
+
+ return a->core.attr.type == b->core.attr.type;
+}
+
static void evsel__disable_missing_features(struct evsel *evsel)
{
if (perf_missing_features.weight_struct) {
@@ -1807,8 +1828,14 @@ static void evsel__disable_missing_features(struct evsel *evsel)
evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
if (perf_missing_features.mmap2)
evsel->core.attr.mmap2 = 0;
- if (perf_missing_features.exclude_guest)
- evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
+ if (perf_missing_features.exclude_guest) {
+ /* we only have EXCLUDE_GUEST bit, let's skip checking */
+ if (perf_missing_features.pmu != NULL &&
+ hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
+ evsel->core.attr.exclude_guest = 0;
+ evsel->core.attr.exclude_host = 0;
+ }
+ }
if (perf_missing_features.lbr_flags)
evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
PERF_SAMPLE_BRANCH_NO_CYCLES);
@@ -1840,6 +1867,14 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,

bool evsel__detect_missing_features(struct evsel *evsel)
{
+ if (perf_missing_features.pmu == NULL) {
+ perf_missing_features.pmu = hashmap__new(pmu_hash, pmu_equal, NULL);
+ if (IS_ERR(perf_missing_features.pmu)) {
+ pr_err("Memory allocation failure!\n");
+ perf_missing_features.pmu = NULL;
+ }
+ }
+
/*
* Must probe features in the order they were added to the
* perf_event_attr interface.
@@ -1900,10 +1935,17 @@ bool evsel__detect_missing_features(struct evsel *evsel)
perf_missing_features.mmap2 = true;
pr_debug2_peo("switching off mmap2\n");
return true;
- } else if (!perf_missing_features.exclude_guest &&
- (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
- perf_missing_features.exclude_guest = true;
- pr_debug2_peo("switching off exclude_guest, exclude_host\n");
+ } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
+ perf_missing_features.pmu != NULL &&
+ !hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
+ uintptr_t pmu_features = PERF_MISSING_PMU_EXCLUDE_GUEST;
+
+ hashmap__add(perf_missing_features.pmu, evsel, (void *)pmu_features);
+
+ if (!perf_missing_features.exclude_guest) {
+ perf_missing_features.exclude_guest = true;
+ pr_debug2_peo("switching off exclude_guest, exclude_host\n");
+ }
return true;
} else if (!perf_missing_features.sample_id_all) {
perf_missing_features.sample_id_all = true;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 1f7edfa8568a..11b5ece19f0e 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -172,6 +172,13 @@ struct perf_missing_features {
bool data_page_size;
bool code_page_size;
bool weight_struct;
+
+ /* contains enum perf_missing_pmu_features below */
+ struct hashmap *pmu;
+};
+
+enum perf_missing_pmu_features {
+ PERF_MISSING_PMU_EXCLUDE_GUEST = 1UL << 0,
};

extern struct perf_missing_features perf_missing_features;
--
2.33.1.1089.g2158813163f-goog


2021-10-31 11:29:45

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

On Fri, Oct 29, 2021 at 03:49:29PM -0700, Namhyung Kim wrote:
> The current logic for the perf missing feature has a bug that it can
> wrongly clear some modifiers like G or H. Actually some PMUs don't
> support any filtering or exclusion while others do. But we check it
> as a global feature.
>
> For example, the cycles event can have 'G' modifier to enable it only
> in the guest mode on x86. When you don't run any VMs it'll return 0.
>
> # perf stat -a -e cycles:G sleep 1
>
> Performance counter stats for 'system wide':
>
> 0 cycles:G
>
> 1.000721670 seconds time elapsed
>
> But when it's used with other pmu events that don't support G modifier,
> it'll be reset and return non-zero values.
>
> # perf stat -a -e cycles:G,msr/tsc/ sleep 1
>
> Performance counter stats for 'system wide':
>
> 538,029,960 cycles:G
> 16,924,010,738 msr/tsc/
>
> 1.001815327 seconds time elapsed
>
> This is because of the missing feature detection logic being global.
> Add a hashmap to set pmu-specific exclude_host/guest features.
>
> Reported-by: Stephane Eranian <[email protected]>
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> v3 changes)
> * check memory allocation failure
> * add more NULL check

Acked-by: Jiri Olsa <[email protected]>

thanks,
jirka

>
> v2 changes)
> * change to enum perf_missing_pmu_features
> * pass NULL to hashmap__find() to skip checking
> * add a blank line after declaration
>
> tools/perf/util/evsel.c | 54 ++++++++++++++++++++++++++++++++++++-----
> tools/perf/util/evsel.h | 7 ++++++
> 2 files changed, 55 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index dbfeceb2546c..d3ff4809627b 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1434,6 +1434,10 @@ void evsel__delete(struct evsel *evsel)
> {
> evsel__exit(evsel);
> free(evsel);
> +
> + /* just free it for the first evsel */
> + hashmap__free(perf_missing_features.pmu);
> + perf_missing_features.pmu = NULL;
> }
>
> void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
> @@ -1791,6 +1795,23 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
> return 0;
> }
>
> +#define PMU_HASH_BITS 4
> +
> +static size_t pmu_hash(const void *key, void *ctx __maybe_unused)
> +{
> + const struct evsel *evsel = key;
> +
> + return hash_bits(evsel->core.attr.type, PMU_HASH_BITS);
> +}
> +
> +static bool pmu_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
> +{
> + const struct evsel *a = key1;
> + const struct evsel *b = key2;
> +
> + return a->core.attr.type == b->core.attr.type;
> +}
> +
> static void evsel__disable_missing_features(struct evsel *evsel)
> {
> if (perf_missing_features.weight_struct) {
> @@ -1807,8 +1828,14 @@ static void evsel__disable_missing_features(struct evsel *evsel)
> evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
> if (perf_missing_features.mmap2)
> evsel->core.attr.mmap2 = 0;
> - if (perf_missing_features.exclude_guest)
> - evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
> + if (perf_missing_features.exclude_guest) {
> + /* we only have EXCLUDE_GUEST bit, let's skip checking */
> + if (perf_missing_features.pmu != NULL &&
> + hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
> + evsel->core.attr.exclude_guest = 0;
> + evsel->core.attr.exclude_host = 0;
> + }
> + }
> if (perf_missing_features.lbr_flags)
> evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
> PERF_SAMPLE_BRANCH_NO_CYCLES);
> @@ -1840,6 +1867,14 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>
> bool evsel__detect_missing_features(struct evsel *evsel)
> {
> + if (perf_missing_features.pmu == NULL) {
> + perf_missing_features.pmu = hashmap__new(pmu_hash, pmu_equal, NULL);
> + if (IS_ERR(perf_missing_features.pmu)) {
> + pr_err("Memory allocation failure!\n");
> + perf_missing_features.pmu = NULL;
> + }
> + }
> +
> /*
> * Must probe features in the order they were added to the
> * perf_event_attr interface.
> @@ -1900,10 +1935,17 @@ bool evsel__detect_missing_features(struct evsel *evsel)
> perf_missing_features.mmap2 = true;
> pr_debug2_peo("switching off mmap2\n");
> return true;
> - } else if (!perf_missing_features.exclude_guest &&
> - (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
> - perf_missing_features.exclude_guest = true;
> - pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> + } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
> + perf_missing_features.pmu != NULL &&
> + !hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
> + uintptr_t pmu_features = PERF_MISSING_PMU_EXCLUDE_GUEST;
> +
> + hashmap__add(perf_missing_features.pmu, evsel, (void *)pmu_features);
> +
> + if (!perf_missing_features.exclude_guest) {
> + perf_missing_features.exclude_guest = true;
> + pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> + }
> return true;
> } else if (!perf_missing_features.sample_id_all) {
> perf_missing_features.sample_id_all = true;
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 1f7edfa8568a..11b5ece19f0e 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -172,6 +172,13 @@ struct perf_missing_features {
> bool data_page_size;
> bool code_page_size;
> bool weight_struct;
> +
> + /* contains enum perf_missing_pmu_features below */
> + struct hashmap *pmu;
> +};
> +
> +enum perf_missing_pmu_features {
> + PERF_MISSING_PMU_EXCLUDE_GUEST = 1UL << 0,
> };
>
> extern struct perf_missing_features perf_missing_features;
> --
> 2.33.1.1089.g2158813163f-goog
>

2021-11-01 21:11:00

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

Em Fri, Oct 29, 2021 at 03:49:29PM -0700, Namhyung Kim escreveu:
> The current logic for the perf missing feature has a bug that it can
> wrongly clear some modifiers like G or H. Actually some PMUs don't
> support any filtering or exclusion while others do. But we check it
> as a global feature.
>
> For example, the cycles event can have 'G' modifier to enable it only
> in the guest mode on x86. When you don't run any VMs it'll return 0.
>
> # perf stat -a -e cycles:G sleep 1
>
> Performance counter stats for 'system wide':
>
> 0 cycles:G
>
> 1.000721670 seconds time elapsed
>
> But when it's used with other pmu events that don't support G modifier,
> it'll be reset and return non-zero values.
>
> # perf stat -a -e cycles:G,msr/tsc/ sleep 1
>
> Performance counter stats for 'system wide':
>
> 538,029,960 cycles:G
> 16,924,010,738 msr/tsc/
>
> 1.001815327 seconds time elapsed
>
> This is because of the missing feature detection logic being global.
> Add a hashmap to set pmu-specific exclude_host/guest features.
>
> Reported-by: Stephane Eranian <[email protected]>
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> v3 changes)
> * check memory allocation failure
> * add more NULL check
>
> v2 changes)
> * change to enum perf_missing_pmu_features
> * pass NULL to hashmap__find() to skip checking
> * add a blank line after declaration
>
> tools/perf/util/evsel.c | 54 ++++++++++++++++++++++++++++++++++++-----
> tools/perf/util/evsel.h | 7 ++++++
> 2 files changed, 55 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index dbfeceb2546c..d3ff4809627b 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1434,6 +1434,10 @@ void evsel__delete(struct evsel *evsel)
> {
> evsel__exit(evsel);
> free(evsel);
> +
> + /* just free it for the first evsel */
> + hashmap__free(perf_missing_features.pmu);
> + perf_missing_features.pmu = NULL;
> }
>
> void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
> @@ -1791,6 +1795,23 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
> return 0;
> }
>
> +#define PMU_HASH_BITS 4
> +
> +static size_t pmu_hash(const void *key, void *ctx __maybe_unused)
> +{
> + const struct evsel *evsel = key;
> +
> + return hash_bits(evsel->core.attr.type, PMU_HASH_BITS);
> +}
> +
> +static bool pmu_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
> +{
> + const struct evsel *a = key1;
> + const struct evsel *b = key2;
> +
> + return a->core.attr.type == b->core.attr.type;
> +}
> +
> static void evsel__disable_missing_features(struct evsel *evsel)
> {
> if (perf_missing_features.weight_struct) {
> @@ -1807,8 +1828,14 @@ static void evsel__disable_missing_features(struct evsel *evsel)
> evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
> if (perf_missing_features.mmap2)
> evsel->core.attr.mmap2 = 0;
> - if (perf_missing_features.exclude_guest)
> - evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
> + if (perf_missing_features.exclude_guest) {
> + /* we only have EXCLUDE_GUEST bit, let's skip checking */
> + if (perf_missing_features.pmu != NULL &&
> + hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
> + evsel->core.attr.exclude_guest = 0;
> + evsel->core.attr.exclude_host = 0;
> + }
> + }
> if (perf_missing_features.lbr_flags)
> evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
> PERF_SAMPLE_BRANCH_NO_CYCLES);
> @@ -1840,6 +1867,14 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>
> bool evsel__detect_missing_features(struct evsel *evsel)
> {
> + if (perf_missing_features.pmu == NULL) {
> + perf_missing_features.pmu = hashmap__new(pmu_hash, pmu_equal, NULL);
> + if (IS_ERR(perf_missing_features.pmu)) {
> + pr_err("Memory allocation failure!\n");
> + perf_missing_features.pmu = NULL;
> + }
> + }
> +
> /*
> * Must probe features in the order they were added to the
> * perf_event_attr interface.
> @@ -1900,10 +1935,17 @@ bool evsel__detect_missing_features(struct evsel *evsel)
> perf_missing_features.mmap2 = true;
> pr_debug2_peo("switching off mmap2\n");
> return true;
> - } else if (!perf_missing_features.exclude_guest &&
> - (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
> - perf_missing_features.exclude_guest = true;
> - pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> + } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
> + perf_missing_features.pmu != NULL &&
> + !hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
> + uintptr_t pmu_features = PERF_MISSING_PMU_EXCLUDE_GUEST;
> +
> + hashmap__add(perf_missing_features.pmu, evsel, (void *)pmu_features);

Can't hashmap__add() fail?

> +
> + if (!perf_missing_features.exclude_guest) {
> + perf_missing_features.exclude_guest = true;
> + pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> + }
> return true;
> } else if (!perf_missing_features.sample_id_all) {
> perf_missing_features.sample_id_all = true;
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 1f7edfa8568a..11b5ece19f0e 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -172,6 +172,13 @@ struct perf_missing_features {
> bool data_page_size;
> bool code_page_size;
> bool weight_struct;
> +
> + /* contains enum perf_missing_pmu_features below */
> + struct hashmap *pmu;
> +};
> +
> +enum perf_missing_pmu_features {
> + PERF_MISSING_PMU_EXCLUDE_GUEST = 1UL << 0,
> };
>
> extern struct perf_missing_features perf_missing_features;
> --
> 2.33.1.1089.g2158813163f-goog

--

- Arnaldo

2021-11-02 14:15:20

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

On Fri, Oct 29, 2021 at 03:49:29PM -0700, Namhyung Kim wrote:
> The current logic for the perf missing feature has a bug that it can
> wrongly clear some modifiers like G or H. Actually some PMUs don't
> support any filtering or exclusion while others do. But we check it
> as a global feature.
>
> For example, the cycles event can have 'G' modifier to enable it only
> in the guest mode on x86. When you don't run any VMs it'll return 0.
>
> # perf stat -a -e cycles:G sleep 1
>
> Performance counter stats for 'system wide':
>
> 0 cycles:G
>
> 1.000721670 seconds time elapsed
>
> But when it's used with other pmu events that don't support G modifier,
> it'll be reset and return non-zero values.
>
> # perf stat -a -e cycles:G,msr/tsc/ sleep 1
>
> Performance counter stats for 'system wide':
>
> 538,029,960 cycles:G
> 16,924,010,738 msr/tsc/
>
> 1.001815327 seconds time elapsed
>
> This is because of the missing feature detection logic being global.
> Add a hashmap to set pmu-specific exclude_host/guest features.
>
> Reported-by: Stephane Eranian <[email protected]>
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> v3 changes)
> * check memory allocation failure
> * add more NULL check

we were discussing this with Arnaldo yesterday and he had an idea to use
evsel->pmu link to store this info instead of hash.. I first thought we
needed 'evsel' related data, but after I gave it some thought I think that
might actually work

my argument was following usecase:

cycles:G,instructions:G,pmu/bla1/:G,pmu/bla2/

that we would falsely clear pmu/bla1/:G if we used the 'evsel->pmu' data..
but then I realized it's detection if pmu support :G and so if the :G is
not there, none of the events should have it

thoughts?

thanks,
jirka


>
> v2 changes)
> * change to enum perf_missing_pmu_features
> * pass NULL to hashmap__find() to skip checking
> * add a blank line after declaration
>
> tools/perf/util/evsel.c | 54 ++++++++++++++++++++++++++++++++++++-----
> tools/perf/util/evsel.h | 7 ++++++
> 2 files changed, 55 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index dbfeceb2546c..d3ff4809627b 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1434,6 +1434,10 @@ void evsel__delete(struct evsel *evsel)
> {
> evsel__exit(evsel);
> free(evsel);
> +
> + /* just free it for the first evsel */
> + hashmap__free(perf_missing_features.pmu);
> + perf_missing_features.pmu = NULL;
> }
>
> void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
> @@ -1791,6 +1795,23 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
> return 0;
> }
>
> +#define PMU_HASH_BITS 4
> +
> +static size_t pmu_hash(const void *key, void *ctx __maybe_unused)
> +{
> + const struct evsel *evsel = key;
> +
> + return hash_bits(evsel->core.attr.type, PMU_HASH_BITS);
> +}
> +
> +static bool pmu_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
> +{
> + const struct evsel *a = key1;
> + const struct evsel *b = key2;
> +
> + return a->core.attr.type == b->core.attr.type;
> +}
> +
> static void evsel__disable_missing_features(struct evsel *evsel)
> {
> if (perf_missing_features.weight_struct) {
> @@ -1807,8 +1828,14 @@ static void evsel__disable_missing_features(struct evsel *evsel)
> evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
> if (perf_missing_features.mmap2)
> evsel->core.attr.mmap2 = 0;
> - if (perf_missing_features.exclude_guest)
> - evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
> + if (perf_missing_features.exclude_guest) {
> + /* we only have EXCLUDE_GUEST bit, let's skip checking */
> + if (perf_missing_features.pmu != NULL &&
> + hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
> + evsel->core.attr.exclude_guest = 0;
> + evsel->core.attr.exclude_host = 0;
> + }
> + }
> if (perf_missing_features.lbr_flags)
> evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
> PERF_SAMPLE_BRANCH_NO_CYCLES);
> @@ -1840,6 +1867,14 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>
> bool evsel__detect_missing_features(struct evsel *evsel)
> {
> + if (perf_missing_features.pmu == NULL) {
> + perf_missing_features.pmu = hashmap__new(pmu_hash, pmu_equal, NULL);
> + if (IS_ERR(perf_missing_features.pmu)) {
> + pr_err("Memory allocation failure!\n");
> + perf_missing_features.pmu = NULL;
> + }
> + }
> +
> /*
> * Must probe features in the order they were added to the
> * perf_event_attr interface.
> @@ -1900,10 +1935,17 @@ bool evsel__detect_missing_features(struct evsel *evsel)
> perf_missing_features.mmap2 = true;
> pr_debug2_peo("switching off mmap2\n");
> return true;
> - } else if (!perf_missing_features.exclude_guest &&
> - (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
> - perf_missing_features.exclude_guest = true;
> - pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> + } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
> + perf_missing_features.pmu != NULL &&
> + !hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
> + uintptr_t pmu_features = PERF_MISSING_PMU_EXCLUDE_GUEST;
> +
> + hashmap__add(perf_missing_features.pmu, evsel, (void *)pmu_features);
> +
> + if (!perf_missing_features.exclude_guest) {
> + perf_missing_features.exclude_guest = true;
> + pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> + }
> return true;
> } else if (!perf_missing_features.sample_id_all) {
> perf_missing_features.sample_id_all = true;
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 1f7edfa8568a..11b5ece19f0e 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -172,6 +172,13 @@ struct perf_missing_features {
> bool data_page_size;
> bool code_page_size;
> bool weight_struct;
> +
> + /* contains enum perf_missing_pmu_features below */
> + struct hashmap *pmu;
> +};
> +
> +enum perf_missing_pmu_features {
> + PERF_MISSING_PMU_EXCLUDE_GUEST = 1UL << 0,
> };
>
> extern struct perf_missing_features perf_missing_features;
> --
> 2.33.1.1089.g2158813163f-goog
>

2021-11-02 23:10:01

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

Hi Arnaldo,

On Mon, Nov 1, 2021 at 2:09 PM Arnaldo Carvalho de Melo <[email protected]> wrote:
>
> Em Fri, Oct 29, 2021 at 03:49:29PM -0700, Namhyung Kim escreveu:
> > @@ -1900,10 +1935,17 @@ bool evsel__detect_missing_features(struct evsel *evsel)
> > perf_missing_features.mmap2 = true;
> > pr_debug2_peo("switching off mmap2\n");
> > return true;
> > - } else if (!perf_missing_features.exclude_guest &&
> > - (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
> > - perf_missing_features.exclude_guest = true;
> > - pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> > + } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
> > + perf_missing_features.pmu != NULL &&
> > + !hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
> > + uintptr_t pmu_features = PERF_MISSING_PMU_EXCLUDE_GUEST;
> > +
> > + hashmap__add(perf_missing_features.pmu, evsel, (void *)pmu_features);
>
> Can't hashmap__add() fail?

It can. I will add a check.

Thanks,
Namhyung

2021-11-02 23:23:40

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

Hi Jiri,

On Tue, Nov 2, 2021 at 7:10 AM Jiri Olsa <[email protected]> wrote:
>
> On Fri, Oct 29, 2021 at 03:49:29PM -0700, Namhyung Kim wrote:
> > The current logic for the perf missing feature has a bug that it can
> > wrongly clear some modifiers like G or H. Actually some PMUs don't
> > support any filtering or exclusion while others do. But we check it
> > as a global feature.
> >
> > For example, the cycles event can have 'G' modifier to enable it only
> > in the guest mode on x86. When you don't run any VMs it'll return 0.
> >
> > # perf stat -a -e cycles:G sleep 1
> >
> > Performance counter stats for 'system wide':
> >
> > 0 cycles:G
> >
> > 1.000721670 seconds time elapsed
> >
> > But when it's used with other pmu events that don't support G modifier,
> > it'll be reset and return non-zero values.
> >
> > # perf stat -a -e cycles:G,msr/tsc/ sleep 1
> >
> > Performance counter stats for 'system wide':
> >
> > 538,029,960 cycles:G
> > 16,924,010,738 msr/tsc/
> >
> > 1.001815327 seconds time elapsed
> >
> > This is because of the missing feature detection logic being global.
> > Add a hashmap to set pmu-specific exclude_host/guest features.
> >
> > Reported-by: Stephane Eranian <[email protected]>
> > Signed-off-by: Namhyung Kim <[email protected]>
> > ---
> > v3 changes)
> > * check memory allocation failure
> > * add more NULL check
>
> we were discussing this with Arnaldo yesterday and he had an idea to use
> evsel->pmu link to store this info instead of hash.. I first thought we
> needed 'evsel' related data, but after I gave it some thought I think that
> might actually work

I don't get it.. do we have evsel->pmu already? Or do you want to add it?
Yeah, the filtering facility (attr.exclude_*) should be kept in a PMU data
not in the evsel. So I added a hashmap to find the pmu data from attr.type.
How do I use evsel->pmu to store the info then?

>
> my argument was following usecase:
>
> cycles:G,instructions:G,pmu/bla1/:G,pmu/bla2/
>
> that we would falsely clear pmu/bla1/:G if we used the 'evsel->pmu' data..
> but then I realized it's detection if pmu support :G and so if the :G is
> not there, none of the events should have it
>
> thoughts?

I don't think I'm following well... ;-p

If the pmu doesn't support host/guest filtering, pmu/bla1/G
may count something. Not sure if it's better to error out.
But the cycles:G and instructions:G should result in 0
in case there's no VM running.

Thanks,
Namhyung

2021-11-03 07:26:40

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

On Tue, Nov 02, 2021 at 04:21:21PM -0700, Namhyung Kim wrote:
> Hi Jiri,
>
> On Tue, Nov 2, 2021 at 7:10 AM Jiri Olsa <[email protected]> wrote:
> >
> > On Fri, Oct 29, 2021 at 03:49:29PM -0700, Namhyung Kim wrote:
> > > The current logic for the perf missing feature has a bug that it can
> > > wrongly clear some modifiers like G or H. Actually some PMUs don't
> > > support any filtering or exclusion while others do. But we check it
> > > as a global feature.
> > >
> > > For example, the cycles event can have 'G' modifier to enable it only
> > > in the guest mode on x86. When you don't run any VMs it'll return 0.
> > >
> > > # perf stat -a -e cycles:G sleep 1
> > >
> > > Performance counter stats for 'system wide':
> > >
> > > 0 cycles:G
> > >
> > > 1.000721670 seconds time elapsed
> > >
> > > But when it's used with other pmu events that don't support G modifier,
> > > it'll be reset and return non-zero values.
> > >
> > > # perf stat -a -e cycles:G,msr/tsc/ sleep 1
> > >
> > > Performance counter stats for 'system wide':
> > >
> > > 538,029,960 cycles:G
> > > 16,924,010,738 msr/tsc/
> > >
> > > 1.001815327 seconds time elapsed
> > >
> > > This is because of the missing feature detection logic being global.
> > > Add a hashmap to set pmu-specific exclude_host/guest features.
> > >
> > > Reported-by: Stephane Eranian <[email protected]>
> > > Signed-off-by: Namhyung Kim <[email protected]>
> > > ---
> > > v3 changes)
> > > * check memory allocation failure
> > > * add more NULL check
> >
> > we were discussing this with Arnaldo yesterday and he had an idea to use
> > evsel->pmu link to store this info instead of hash.. I first thought we
> > needed 'evsel' related data, but after I gave it some thought I think that
> > might actually work
>
> I don't get it.. do we have evsel->pmu already? Or do you want to add it?
> Yeah, the filtering facility (attr.exclude_*) should be kept in a PMU data
> not in the evsel. So I added a hashmap to find the pmu data from attr.type.
> How do I use evsel->pmu to store the info then?

evsel->pmu is not there yet (only evsel->pmu_name) so that
would need to be added.. we have evsel__find_pmu available

then the idea is to use evsel->pmu instead of the hasmap,
like add:

struct pmu {
...
bool missing_exclude_guest;
};

set it when the guest filtering fails and and check it
instead of the hashmap__find call

>
> >
> > my argument was following usecase:
> >
> > cycles:G,instructions:G,pmu/bla1/:G,pmu/bla2/
> >
> > that we would falsely clear pmu/bla1/:G if we used the 'evsel->pmu' data..
> > but then I realized it's detection if pmu support :G and so if the :G is
> > not there, none of the events should have it
> >
> > thoughts?
>
> I don't think I'm following well... ;-p
>
> If the pmu doesn't support host/guest filtering, pmu/bla1/G
> may count something. Not sure if it's better to error out.
> But the cycles:G and instructions:G should result in 0
> in case there's no VM running.

hm, I think if pmu doesn't support host/guest filtering then
I think 'pmu/bla1/G' should error, no? better no number than
bad number

jitka

>
> Thanks,
> Namhyung
>

2021-11-03 07:26:51

by Ravi Bangoria

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

> The current logic for the perf missing feature has a bug that it can
> wrongly clear some modifiers like G or H. Actually some PMUs don't
> support any filtering or exclusion while others do. But we check it
> as a global feature.

(Sorry to pitch in bit late)

AMD has one more problem on a similar line. On AMD, non-precise and
precise sampling are provided by core and IBS pmu respectively. Plus,
core pmu has filtering capability but IBS does not. Perf by default
sets precise_ip=3 and exclude_guest=1 and goes on decreasing precise_ip
with exclude_guest set until perf_event_open() succeeds. This is
causing perf to always fallback to core pmu (non-precise mode) even if
it's perfectly feasible to do precise sampling. Do you guys think this
problem should also be addressed while designing solution for Namhyung's
patch or solve it seperately like below patch:

---><---

From 48808299679199c39ff737a30a7f387669314fd7 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <[email protected]>
Date: Tue, 2 Nov 2021 11:01:12 +0530
Subject: [PATCH] perf/amd/ibs: Don't set exclude_guest by default

Perf tool sets exclude_guest by default while calling perf_event_open().
Because IBS does not have filtering capability, it always gets rejected
by IBS PMU driver and thus perf falls back to non-precise sampling. Fix
it by not setting exclude_guest by default on AMD.

Before:
$ sudo ./perf record -C 0 -vvv true |& grep precise
precise_ip 3
decreasing precise_ip by one (2)
precise_ip 2
decreasing precise_ip by one (1)
precise_ip 1
decreasing precise_ip by one (0)

After:
$ sudo ./perf record -C 0 -vvv true |& grep precise
precise_ip 3
decreasing precise_ip by one (2)
precise_ip 2

Reported-by: Kim Phillips <[email protected]>
Signed-off-by: Ravi Bangoria <[email protected]>
---
tools/perf/arch/x86/util/evsel.c | 23 +++++++++++++++++++++++
tools/perf/util/evsel.c | 12 +++++++-----
tools/perf/util/evsel.h | 1 +
3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
index 2f733cdc8dbb..7841a49ce856 100644
--- a/tools/perf/arch/x86/util/evsel.c
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -1,8 +1,31 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
+#include <stdlib.h>
#include "util/evsel.h"
+#include "util/env.h"
+#include "linux/string.h"

void arch_evsel__set_sample_weight(struct evsel *evsel)
{
evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
}
+
+void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr)
+{
+ struct perf_env env = {0};
+
+ if (!perf_env__cpuid(&env))
+ return;
+
+ /*
+ * On AMD, precise cycles event sampling internally uses IBS pmu.
+ * But IBS does not have filtering capabilities and perf by default
+ * sets exclude_guest = 1. This makes IBS pmu event init fail and
+ * thus perf ends up doing non-precise sampling. Avoid it by clearing
+ * exclude_guest.
+ */
+ if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD"))
+ attr->exclude_guest = 0;
+
+ free(env.cpuid);
+}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index dbfeceb2546c..0b4267d4bb38 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -294,7 +294,7 @@ static bool perf_event_can_profile_kernel(void)
return perf_event_paranoid_check(1);
}

-struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config)
+struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 config)
{
struct perf_event_attr attr = {
.type = type,
@@ -305,18 +305,16 @@ struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config)

event_attr_init(&attr);

- if (!precise)
- goto new_event;
-
/*
* Now let the usual logic to set up the perf_event_attr defaults
* to kick in when we return and before perf_evsel__open() is called.
*/
-new_event:
evsel = evsel__new(&attr);
if (evsel == NULL)
goto out;

+ arch_evsel__fixup_new_cycles(&evsel->core.attr);
+
evsel->precise_max = true;

/* use asprintf() because free(evsel) assumes name is allocated */
@@ -1047,6 +1045,10 @@ void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
evsel__set_sample_bit(evsel, WEIGHT);
}

+void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_unused)
+{
+}
+
/*
* The enable_on_exec/disabled value strategy:
*
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 1f7edfa8568a..764e54c6a23d 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -277,6 +277,7 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma
void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);

void arch_evsel__set_sample_weight(struct evsel *evsel);
+void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr);

int evsel__set_filter(struct evsel *evsel, const char *filter);
int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
--
2.27.0

2021-11-03 07:46:40

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

On Wed, Nov 3, 2021 at 12:24 AM Jiri Olsa <[email protected]> wrote:
>
> On Tue, Nov 02, 2021 at 04:21:21PM -0700, Namhyung Kim wrote:
> > Hi Jiri,
> >
> > On Tue, Nov 2, 2021 at 7:10 AM Jiri Olsa <[email protected]> wrote:
> > >
> > > On Fri, Oct 29, 2021 at 03:49:29PM -0700, Namhyung Kim wrote:
> > > > The current logic for the perf missing feature has a bug that it can
> > > > wrongly clear some modifiers like G or H. Actually some PMUs don't
> > > > support any filtering or exclusion while others do. But we check it
> > > > as a global feature.
> > > >
> > > > For example, the cycles event can have 'G' modifier to enable it only
> > > > in the guest mode on x86. When you don't run any VMs it'll return 0.
> > > >
> > > > # perf stat -a -e cycles:G sleep 1
> > > >
> > > > Performance counter stats for 'system wide':
> > > >
> > > > 0 cycles:G
> > > >
> > > > 1.000721670 seconds time elapsed
> > > >
> > > > But when it's used with other pmu events that don't support G modifier,
> > > > it'll be reset and return non-zero values.
> > > >
> > > > # perf stat -a -e cycles:G,msr/tsc/ sleep 1
> > > >
> > > > Performance counter stats for 'system wide':
> > > >
> > > > 538,029,960 cycles:G
> > > > 16,924,010,738 msr/tsc/
> > > >
> > > > 1.001815327 seconds time elapsed
> > > >
> > > > This is because of the missing feature detection logic being global.
> > > > Add a hashmap to set pmu-specific exclude_host/guest features.
> > > >
> > > > Reported-by: Stephane Eranian <[email protected]>
> > > > Signed-off-by: Namhyung Kim <[email protected]>
> > > > ---
> > > > v3 changes)
> > > > * check memory allocation failure
> > > > * add more NULL check
> > >
> > > we were discussing this with Arnaldo yesterday and he had an idea to use
> > > evsel->pmu link to store this info instead of hash.. I first thought we
> > > needed 'evsel' related data, but after I gave it some thought I think that
> > > might actually work
> >
> > I don't get it.. do we have evsel->pmu already? Or do you want to add it?
> > Yeah, the filtering facility (attr.exclude_*) should be kept in a PMU data
> > not in the evsel. So I added a hashmap to find the pmu data from attr.type.
> > How do I use evsel->pmu to store the info then?
>
> evsel->pmu is not there yet (only evsel->pmu_name) so that
> would need to be added.. we have evsel__find_pmu available
>
> then the idea is to use evsel->pmu instead of the hasmap,
> like add:
>
> struct pmu {
> ...
> bool missing_exclude_guest;
> };
>
> set it when the guest filtering fails and and check it
> instead of the hashmap__find call
>
> >
> > >
> > > my argument was following usecase:
> > >
> > > cycles:G,instructions:G,pmu/bla1/:G,pmu/bla2/
> > >
> > > that we would falsely clear pmu/bla1/:G if we used the 'evsel->pmu' data..
> > > but then I realized it's detection if pmu support :G and so if the :G is
> > > not there, none of the events should have it
> > >
> > > thoughts?
> >
> > I don't think I'm following well... ;-p
> >
> > If the pmu doesn't support host/guest filtering, pmu/bla1/G
> > may count something. Not sure if it's better to error out.
> > But the cycles:G and instructions:G should result in 0
> > in case there's no VM running.
>
> hm, I think if pmu doesn't support host/guest filtering then
> I think 'pmu/bla1/G' should error, no? better no number than
> bad number
>
Yes, it should in my opinion.

> jitka
>
> >
> > Thanks,
> > Namhyung
> >
>

2021-11-03 11:34:40

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

Em Wed, Nov 03, 2021 at 12:44:12AM -0700, Stephane Eranian escreveu:
> On Wed, Nov 3, 2021 at 12:24 AM Jiri Olsa <[email protected]> wrote:
> > On Tue, Nov 02, 2021 at 04:21:21PM -0700, Namhyung Kim wrote:
> > > On Tue, Nov 2, 2021 at 7:10 AM Jiri Olsa <[email protected]> wrote:
> > > > we were discussing this with Arnaldo yesterday and he had an idea to use
> > > > evsel->pmu link to store this info instead of hash.. I first thought we
> > > > needed 'evsel' related data, but after I gave it some thought I think that
> > > > might actually work

> > > I don't get it.. do we have evsel->pmu already? Or do you want to add it?
> > > Yeah, the filtering facility (attr.exclude_*) should be kept in a PMU data
> > > not in the evsel. So I added a hashmap to find the pmu data from attr.type.
> > > How do I use evsel->pmu to store the info then?

> > evsel->pmu is not there yet (only evsel->pmu_name) so that
> > would need to be added.. we have evsel__find_pmu available

> > then the idea is to use evsel->pmu instead of the hasmap,
> > like add:

> > struct pmu {
> > ...
> > bool missing_exclude_guest;
> > };

yeah, or more generaly:

struct pmu {
...
struct {
bool exclude_guess;
} missing_features;
};

> > set it when the guest filtering fails and and check it
> > instead of the hashmap__find call

> > > > my argument was following usecase:

> > > > cycles:G,instructions:G,pmu/bla1/:G,pmu/bla2/

> > > > that we would falsely clear pmu/bla1/:G if we used the 'evsel->pmu' data..
> > > > but then I realized it's detection if pmu support :G and so if the :G is
> > > > not there, none of the events should have it

> > > > thoughts?

> > > I don't think I'm following well... ;-p

> > > If the pmu doesn't support host/guest filtering, pmu/bla1/G
> > > may count something. Not sure if it's better to error out.
> > > But the cycles:G and instructions:G should result in 0
> > > in case there's no VM running.

> > hm, I think if pmu doesn't support host/guest filtering then
> > I think 'pmu/bla1/G' should error, no? better no number than
> > bad number

> Yes, it should in my opinion.

Yeah, I thought about this yesterday (holiday here).

- Arnaldo

2021-11-03 17:37:01

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

On Wed, Nov 3, 2021 at 4:32 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
>
> Em Wed, Nov 03, 2021 at 12:44:12AM -0700, Stephane Eranian escreveu:
> > On Wed, Nov 3, 2021 at 12:24 AM Jiri Olsa <[email protected]> wrote:
> > > On Tue, Nov 02, 2021 at 04:21:21PM -0700, Namhyung Kim wrote:
> > > > On Tue, Nov 2, 2021 at 7:10 AM Jiri Olsa <[email protected]> wrote:
> > > > > we were discussing this with Arnaldo yesterday and he had an idea to use
> > > > > evsel->pmu link to store this info instead of hash.. I first thought we
> > > > > needed 'evsel' related data, but after I gave it some thought I think that
> > > > > might actually work
>
> > > > I don't get it.. do we have evsel->pmu already? Or do you want to add it?
> > > > Yeah, the filtering facility (attr.exclude_*) should be kept in a PMU data
> > > > not in the evsel. So I added a hashmap to find the pmu data from attr.type.
> > > > How do I use evsel->pmu to store the info then?
>
> > > evsel->pmu is not there yet (only evsel->pmu_name) so that
> > > would need to be added.. we have evsel__find_pmu available
>
> > > then the idea is to use evsel->pmu instead of the hasmap,
> > > like add:
>
> > > struct pmu {
> > > ...
> > > bool missing_exclude_guest;
> > > };
>
> yeah, or more generaly:
>
> struct pmu {
> ...
> struct {
> bool exclude_guess;
> } missing_features;
> };
>
> > > set it when the guest filtering fails and and check it
> > > instead of the hashmap__find call
>
> > > > > my argument was following usecase:
>
> > > > > cycles:G,instructions:G,pmu/bla1/:G,pmu/bla2/
>
> > > > > that we would falsely clear pmu/bla1/:G if we used the 'evsel->pmu' data..
> > > > > but then I realized it's detection if pmu support :G and so if the :G is
> > > > > not there, none of the events should have it
>
> > > > > thoughts?
>
> > > > I don't think I'm following well... ;-p
>
> > > > If the pmu doesn't support host/guest filtering, pmu/bla1/G
> > > > may count something. Not sure if it's better to error out.
> > > > But the cycles:G and instructions:G should result in 0
> > > > in case there's no VM running.
>
> > > hm, I think if pmu doesn't support host/guest filtering then
> > > I think 'pmu/bla1/G' should error, no? better no number than
> > > bad number
>
> > Yes, it should in my opinion.
>
> Yeah, I thought about this yesterday (holiday here).
>
Otherwise you create the illusion that you are monitoring in guest
mode when you are not.
The question is: how can the tool know which modifiers are supported
per pmu model?

2021-11-03 21:05:06

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

Em Wed, Nov 03, 2021 at 10:35:04AM -0700, Stephane Eranian escreveu:
> On Wed, Nov 3, 2021 at 4:32 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > Em Wed, Nov 03, 2021 at 12:44:12AM -0700, Stephane Eranian escreveu:
> > > On Wed, Nov 3, 2021 at 12:24 AM Jiri Olsa <[email protected]> wrote:
> > > > > If the pmu doesn't support host/guest filtering, pmu/bla1/G
> > > > > may count something. Not sure if it's better to error out.
> > > > > But the cycles:G and instructions:G should result in 0
> > > > > in case there's no VM running.

> > > > hm, I think if pmu doesn't support host/guest filtering then
> > > > I think 'pmu/bla1/G' should error, no? better no number than
> > > > bad number

> > > Yes, it should in my opinion.

> > Yeah, I thought about this yesterday (holiday here).

> Otherwise you create the illusion that you are monitoring in guest
> mode when you are not.

> The question is: how can the tool know which modifiers are supported
> per pmu model?

As things stand kernel-wise, we should just do capability querying, i.e.
if the user asks for a feature not available for a specific PMU, we
should refuse and provide a helpful error message to the user.

If the PMUs in the kernel had some kind of mask that stated what of the
'struct perf_event_attr' selectable features are supported, then we
would just be able to avoid bothering the kernel asking for unsupported
stuff.

Just for exclude_guest we don't even need to have
evsel->pmu->missing_features.exclude_guest, as this is a hard error, no
point in caching previous capability queries.

- Arnaldo

2021-11-03 22:32:59

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

On Wed, Nov 3, 2021 at 2:03 PM Arnaldo Carvalho de Melo <[email protected]> wrote:
>
> Em Wed, Nov 03, 2021 at 10:35:04AM -0700, Stephane Eranian escreveu:
> > On Wed, Nov 3, 2021 at 4:32 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > > Em Wed, Nov 03, 2021 at 12:44:12AM -0700, Stephane Eranian escreveu:
> > > > On Wed, Nov 3, 2021 at 12:24 AM Jiri Olsa <[email protected]> wrote:
> > > > > > If the pmu doesn't support host/guest filtering, pmu/bla1/G
> > > > > > may count something. Not sure if it's better to error out.
> > > > > > But the cycles:G and instructions:G should result in 0
> > > > > > in case there's no VM running.
>
> > > > > hm, I think if pmu doesn't support host/guest filtering then
> > > > > I think 'pmu/bla1/G' should error, no? better no number than
> > > > > bad number
>
> > > > Yes, it should in my opinion.
>
> > > Yeah, I thought about this yesterday (holiday here).
>
> > Otherwise you create the illusion that you are monitoring in guest
> > mode when you are not.
>
> > The question is: how can the tool know which modifiers are supported
> > per pmu model?
>
> As things stand kernel-wise, we should just do capability querying, i.e.
> if the user asks for a feature not available for a specific PMU, we
> should refuse and provide a helpful error message to the user.
>
> If the PMUs in the kernel had some kind of mask that stated what of the
> 'struct perf_event_attr' selectable features are supported, then we
> would just be able to avoid bothering the kernel asking for unsupported
> stuff.
>
I think we could add something like that in the sysfs entry for each
PMU instance.
that would avoid all these perf_event_open() calls and trying to
decipher the error
code.

> Just for exclude_guest we don't even need to have
> evsel->pmu->missing_features.exclude_guest, as this is a hard error, no
> point in caching previous capability queries.
>
> - Arnaldo

2021-11-04 18:49:04

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

Em Wed, Nov 03, 2021 at 03:29:50PM -0700, Stephane Eranian escreveu:
> On Wed, Nov 3, 2021 at 2:03 PM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > Em Wed, Nov 03, 2021 at 10:35:04AM -0700, Stephane Eranian escreveu:
> > > On Wed, Nov 3, 2021 at 4:32 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > > > Em Wed, Nov 03, 2021 at 12:44:12AM -0700, Stephane Eranian escreveu:
> > > > > On Wed, Nov 3, 2021 at 12:24 AM Jiri Olsa <[email protected]> wrote:
> > > > > > > If the pmu doesn't support host/guest filtering, pmu/bla1/G
> > > > > > > may count something. Not sure if it's better to error out.
> > > > > > > But the cycles:G and instructions:G should result in 0
> > > > > > > in case there's no VM running.

> > > > > > hm, I think if pmu doesn't support host/guest filtering then
> > > > > > I think 'pmu/bla1/G' should error, no? better no number than
> > > > > > bad number

> > > > > Yes, it should in my opinion.

> > > > Yeah, I thought about this yesterday (holiday here).

> > > Otherwise you create the illusion that you are monitoring in guest
> > > mode when you are not.

> > > The question is: how can the tool know which modifiers are supported
> > > per pmu model?

> > As things stand kernel-wise, we should just do capability querying, i.e.
> > if the user asks for a feature not available for a specific PMU, we
> > should refuse and provide a helpful error message to the user.

> > If the PMUs in the kernel had some kind of mask that stated what of the
> > 'struct perf_event_attr' selectable features are supported, then we
> > would just be able to avoid bothering the kernel asking for unsupported
> > stuff.

> I think we could add something like that in the sysfs entry for each
> PMU instance.
> that would avoid all these perf_event_open() calls and trying to
> decipher the error
> code.

That would speed up these checks with newer kernels, yeah, with older
kernels we'd fall back to what we have now + bailing out in the current
case (PMUs not supporting exclude_guest).

- Arnaldo

> > Just for exclude_guest we don't even need to have
> > evsel->pmu->missing_features.exclude_guest, as this is a hard error, no
> > point in caching previous capability queries.

2021-11-04 21:41:01

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

On Thu, Nov 4, 2021 at 10:40 AM Arnaldo Carvalho de Melo
<[email protected]> wrote:
>
> Em Wed, Nov 03, 2021 at 03:29:50PM -0700, Stephane Eranian escreveu:
> > On Wed, Nov 3, 2021 at 2:03 PM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > > Em Wed, Nov 03, 2021 at 10:35:04AM -0700, Stephane Eranian escreveu:
> > > > On Wed, Nov 3, 2021 at 4:32 AM Arnaldo Carvalho de Melo <[email protected]> wrote:
> > > > > Em Wed, Nov 03, 2021 at 12:44:12AM -0700, Stephane Eranian escreveu:
> > > > > > On Wed, Nov 3, 2021 at 12:24 AM Jiri Olsa <[email protected]> wrote:
> > > > > > > > If the pmu doesn't support host/guest filtering, pmu/bla1/G
> > > > > > > > may count something. Not sure if it's better to error out.
> > > > > > > > But the cycles:G and instructions:G should result in 0
> > > > > > > > in case there's no VM running.
>
> > > > > > > hm, I think if pmu doesn't support host/guest filtering then
> > > > > > > I think 'pmu/bla1/G' should error, no? better no number than
> > > > > > > bad number
>
> > > > > > Yes, it should in my opinion.
>
> > > > > Yeah, I thought about this yesterday (holiday here).
>
> > > > Otherwise you create the illusion that you are monitoring in guest
> > > > mode when you are not.
>
> > > > The question is: how can the tool know which modifiers are supported
> > > > per pmu model?
>
> > > As things stand kernel-wise, we should just do capability querying, i.e.
> > > if the user asks for a feature not available for a specific PMU, we
> > > should refuse and provide a helpful error message to the user.
>
> > > If the PMUs in the kernel had some kind of mask that stated what of the
> > > 'struct perf_event_attr' selectable features are supported, then we
> > > would just be able to avoid bothering the kernel asking for unsupported
> > > stuff.
>
> > I think we could add something like that in the sysfs entry for each
> > PMU instance.
> > that would avoid all these perf_event_open() calls and trying to
> > decipher the error
> > code.
>
> That would speed up these checks with newer kernels, yeah, with older
> kernels we'd fall back to what we have now + bailing out in the current
> case (PMUs not supporting exclude_guest).

Agreed.

>
> > > Just for exclude_guest we don't even need to have
> > > evsel->pmu->missing_features.exclude_guest, as this is a hard error, no
> > > point in caching previous capability queries.

But we set attr.exclude_guest by default so we should check
if the user set the modifier explicitly or not.

With the explicit G modifier, it should fail immediately.
If it's implicit, then we should remove it from the attr
and proceed. So it's worth it to cache the result.

I think we don't need to fill evsel->pmu link unnecessarily.
I mean we can set it only if needed (for exclude_guest)
and keep a NULL pointer when it has no problem.

Thanks,
Namhyung

2021-11-05 21:29:11

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

Hello,

On Wed, Nov 3, 2021 at 12:22 AM Ravi Bangoria <[email protected]> wrote:
>
> > The current logic for the perf missing feature has a bug that it can
> > wrongly clear some modifiers like G or H. Actually some PMUs don't
> > support any filtering or exclusion while others do. But we check it
> > as a global feature.
>
> (Sorry to pitch in bit late)
>
> AMD has one more problem on a similar line. On AMD, non-precise and
> precise sampling are provided by core and IBS pmu respectively. Plus,
> core pmu has filtering capability but IBS does not. Perf by default
> sets precise_ip=3 and exclude_guest=1 and goes on decreasing precise_ip
> with exclude_guest set until perf_event_open() succeeds. This is
> causing perf to always fallback to core pmu (non-precise mode) even if
> it's perfectly feasible to do precise sampling. Do you guys think this
> problem should also be addressed while designing solution for Namhyung's
> patch or solve it seperately like below patch:
>
> ---><---
>
> From 48808299679199c39ff737a30a7f387669314fd7 Mon Sep 17 00:00:00 2001
> From: Ravi Bangoria <[email protected]>
> Date: Tue, 2 Nov 2021 11:01:12 +0530
> Subject: [PATCH] perf/amd/ibs: Don't set exclude_guest by default
>
> Perf tool sets exclude_guest by default while calling perf_event_open().
> Because IBS does not have filtering capability, it always gets rejected
> by IBS PMU driver and thus perf falls back to non-precise sampling. Fix
> it by not setting exclude_guest by default on AMD.
>
> Before:
> $ sudo ./perf record -C 0 -vvv true |& grep precise
> precise_ip 3
> decreasing precise_ip by one (2)
> precise_ip 2
> decreasing precise_ip by one (1)
> precise_ip 1
> decreasing precise_ip by one (0)
>
> After:
> $ sudo ./perf record -C 0 -vvv true |& grep precise
> precise_ip 3
> decreasing precise_ip by one (2)
> precise_ip 2
>
> Reported-by: Kim Phillips <[email protected]>
> Signed-off-by: Ravi Bangoria <[email protected]>

It'd be nice if it can cover explicit -e cycles:pp as well. Anyway,

Acked-by: Namhyung Kim <[email protected]>

Thanks,
Namhyung


> ---
> tools/perf/arch/x86/util/evsel.c | 23 +++++++++++++++++++++++
> tools/perf/util/evsel.c | 12 +++++++-----
> tools/perf/util/evsel.h | 1 +
> 3 files changed, 31 insertions(+), 5 deletions(-)
>
> diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
> index 2f733cdc8dbb..7841a49ce856 100644
> --- a/tools/perf/arch/x86/util/evsel.c
> +++ b/tools/perf/arch/x86/util/evsel.c
> @@ -1,8 +1,31 @@
> // SPDX-License-Identifier: GPL-2.0
> #include <stdio.h>
> +#include <stdlib.h>
> #include "util/evsel.h"
> +#include "util/env.h"
> +#include "linux/string.h"
>
> void arch_evsel__set_sample_weight(struct evsel *evsel)
> {
> evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
> }
> +
> +void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr)
> +{
> + struct perf_env env = {0};
> +
> + if (!perf_env__cpuid(&env))
> + return;
> +
> + /*
> + * On AMD, precise cycles event sampling internally uses IBS pmu.
> + * But IBS does not have filtering capabilities and perf by default
> + * sets exclude_guest = 1. This makes IBS pmu event init fail and
> + * thus perf ends up doing non-precise sampling. Avoid it by clearing
> + * exclude_guest.
> + */
> + if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD"))
> + attr->exclude_guest = 0;
> +
> + free(env.cpuid);
> +}
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index dbfeceb2546c..0b4267d4bb38 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -294,7 +294,7 @@ static bool perf_event_can_profile_kernel(void)
> return perf_event_paranoid_check(1);
> }
>
> -struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config)
> +struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 config)
> {
> struct perf_event_attr attr = {
> .type = type,
> @@ -305,18 +305,16 @@ struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config)
>
> event_attr_init(&attr);
>
> - if (!precise)
> - goto new_event;
> -
> /*
> * Now let the usual logic to set up the perf_event_attr defaults
> * to kick in when we return and before perf_evsel__open() is called.
> */
> -new_event:
> evsel = evsel__new(&attr);
> if (evsel == NULL)
> goto out;
>
> + arch_evsel__fixup_new_cycles(&evsel->core.attr);
> +
> evsel->precise_max = true;
>
> /* use asprintf() because free(evsel) assumes name is allocated */
> @@ -1047,6 +1045,10 @@ void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
> evsel__set_sample_bit(evsel, WEIGHT);
> }
>
> +void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_unused)
> +{
> +}
> +
> /*
> * The enable_on_exec/disabled value strategy:
> *
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 1f7edfa8568a..764e54c6a23d 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -277,6 +277,7 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma
> void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
>
> void arch_evsel__set_sample_weight(struct evsel *evsel);
> +void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr);
>
> int evsel__set_filter(struct evsel *evsel, const char *filter);
> int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
> --
> 2.27.0
>

2021-11-07 07:36:14

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

Em Fri, Nov 05, 2021 at 11:00:29AM -0700, Namhyung Kim escreveu:
> Hello,
>
> On Wed, Nov 3, 2021 at 12:22 AM Ravi Bangoria <[email protected]> wrote:
> >
> > > The current logic for the perf missing feature has a bug that it can
> > > wrongly clear some modifiers like G or H. Actually some PMUs don't
> > > support any filtering or exclusion while others do. But we check it
> > > as a global feature.
> >
> > (Sorry to pitch in bit late)
> >
> > AMD has one more problem on a similar line. On AMD, non-precise and
> > precise sampling are provided by core and IBS pmu respectively. Plus,
> > core pmu has filtering capability but IBS does not. Perf by default
> > sets precise_ip=3 and exclude_guest=1 and goes on decreasing precise_ip
> > with exclude_guest set until perf_event_open() succeeds. This is
> > causing perf to always fallback to core pmu (non-precise mode) even if
> > it's perfectly feasible to do precise sampling. Do you guys think this
> > problem should also be addressed while designing solution for Namhyung's
> > patch or solve it seperately like below patch:
> >
> > ---><---
> >
> > From 48808299679199c39ff737a30a7f387669314fd7 Mon Sep 17 00:00:00 2001
> > From: Ravi Bangoria <[email protected]>
> > Date: Tue, 2 Nov 2021 11:01:12 +0530
> > Subject: [PATCH] perf/amd/ibs: Don't set exclude_guest by default
> >
> > Perf tool sets exclude_guest by default while calling perf_event_open().
> > Because IBS does not have filtering capability, it always gets rejected
> > by IBS PMU driver and thus perf falls back to non-precise sampling. Fix
> > it by not setting exclude_guest by default on AMD.
> >
> > Before:
> > $ sudo ./perf record -C 0 -vvv true |& grep precise
> > precise_ip 3
> > decreasing precise_ip by one (2)
> > precise_ip 2
> > decreasing precise_ip by one (1)
> > precise_ip 1
> > decreasing precise_ip by one (0)
> >
> > After:
> > $ sudo ./perf record -C 0 -vvv true |& grep precise
> > precise_ip 3
> > decreasing precise_ip by one (2)
> > precise_ip 2
> >
> > Reported-by: Kim Phillips <[email protected]>
> > Signed-off-by: Ravi Bangoria <[email protected]>
>
> It'd be nice if it can cover explicit -e cycles:pp as well. Anyway,

Ravi, please consider Namhyung's request, a patch on top as I'm adding
this already.

> Acked-by: Namhyung Kim <[email protected]>

Thanks, applied.

- Arnaldo

2021-11-07 18:00:43

by Ravi Bangoria

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting



On 07-Nov-21 12:54 AM, Arnaldo Carvalho de Melo wrote:
> Em Fri, Nov 05, 2021 at 11:00:29AM -0700, Namhyung Kim escreveu:
>> Hello,
>>
>> On Wed, Nov 3, 2021 at 12:22 AM Ravi Bangoria <[email protected]> wrote:
>>>
>>>> The current logic for the perf missing feature has a bug that it can
>>>> wrongly clear some modifiers like G or H. Actually some PMUs don't
>>>> support any filtering or exclusion while others do. But we check it
>>>> as a global feature.
>>>
>>> (Sorry to pitch in bit late)
>>>
>>> AMD has one more problem on a similar line. On AMD, non-precise and
>>> precise sampling are provided by core and IBS pmu respectively. Plus,
>>> core pmu has filtering capability but IBS does not. Perf by default
>>> sets precise_ip=3 and exclude_guest=1 and goes on decreasing precise_ip
>>> with exclude_guest set until perf_event_open() succeeds. This is
>>> causing perf to always fallback to core pmu (non-precise mode) even if
>>> it's perfectly feasible to do precise sampling. Do you guys think this
>>> problem should also be addressed while designing solution for Namhyung's
>>> patch or solve it seperately like below patch:
>>>
>>> ---><---
>>>
>>> From 48808299679199c39ff737a30a7f387669314fd7 Mon Sep 17 00:00:00 2001
>>> From: Ravi Bangoria <[email protected]>
>>> Date: Tue, 2 Nov 2021 11:01:12 +0530
>>> Subject: [PATCH] perf/amd/ibs: Don't set exclude_guest by default
>>>
>>> Perf tool sets exclude_guest by default while calling perf_event_open().
>>> Because IBS does not have filtering capability, it always gets rejected
>>> by IBS PMU driver and thus perf falls back to non-precise sampling. Fix
>>> it by not setting exclude_guest by default on AMD.
>>>
>>> Before:
>>> $ sudo ./perf record -C 0 -vvv true |& grep precise
>>> precise_ip 3
>>> decreasing precise_ip by one (2)
>>> precise_ip 2
>>> decreasing precise_ip by one (1)
>>> precise_ip 1
>>> decreasing precise_ip by one (0)
>>>
>>> After:
>>> $ sudo ./perf record -C 0 -vvv true |& grep precise
>>> precise_ip 3
>>> decreasing precise_ip by one (2)
>>> precise_ip 2
>>>
>>> Reported-by: Kim Phillips <[email protected]>
>>> Signed-off-by: Ravi Bangoria <[email protected]>
>>
>> It'd be nice if it can cover explicit -e cycles:pp as well. Anyway,
>
> Ravi, please consider Namhyung's request, a patch on top as I'm adding
> this already.

For explicit :pp modifier, evsel->precise_max does not get set and thus perf
does not try with different attr->precise_ip values while exclude_guest set.
So no issue with explicit :pp:

$ sudo ./perf record -C 0 -e cycles:pp -vvv |& grep "precise_ip\|exclude_guest"
precise_ip 2
exclude_guest 1
precise_ip 2
exclude_guest 1
switching off exclude_guest, exclude_host
precise_ip 2
^C

Also, with :P modifier, evsel->precise_max gets set but exclude_guest does
not and thus :P also works fine:

$ sudo ./perf record -C 0 -e cycles:P -vvv |& grep "precise_ip\|exclude_guest"
precise_ip 3
decreasing precise_ip by one (2)
precise_ip 2
^C

Thanks,
Ravi

2021-11-07 18:02:10

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting



On November 7, 2021 7:25:45 AM GMT-03:00, Ravi Bangoria <[email protected]> wrote:
>
>
>On 07-Nov-21 12:54 AM, Arnaldo Carvalho de Melo wrote:
>> Em Fri, Nov 05, 2021 at 11:00:29AM -0700, Namhyung Kim escreveu:
>>> Hello,
>>>
>>> On Wed, Nov 3, 2021 at 12:22 AM Ravi Bangoria <[email protected]> wrote:
>>>>
>>>>> The current logic for the perf missing feature has a bug that it can
>>>>> wrongly clear some modifiers like G or H. Actually some PMUs don't
>>>>> support any filtering or exclusion while others do. But we check it
>>>>> as a global feature.
>>>>
>>>> (Sorry to pitch in bit late)
>>>>
>>>> AMD has one more problem on a similar line. On AMD, non-precise and
>>>> precise sampling are provided by core and IBS pmu respectively. Plus,
>>>> core pmu has filtering capability but IBS does not. Perf by default
>>>> sets precise_ip=3 and exclude_guest=1 and goes on decreasing precise_ip
>>>> with exclude_guest set until perf_event_open() succeeds. This is
>>>> causing perf to always fallback to core pmu (non-precise mode) even if
>>>> it's perfectly feasible to do precise sampling. Do you guys think this
>>>> problem should also be addressed while designing solution for Namhyung's
>>>> patch or solve it seperately like below patch:
>>>>
>>>> ---><---
>>>>
>>>> From 48808299679199c39ff737a30a7f387669314fd7 Mon Sep 17 00:00:00 2001
>>>> From: Ravi Bangoria <[email protected]>
>>>> Date: Tue, 2 Nov 2021 11:01:12 +0530
>>>> Subject: [PATCH] perf/amd/ibs: Don't set exclude_guest by default
>>>>
>>>> Perf tool sets exclude_guest by default while calling perf_event_open().
>>>> Because IBS does not have filtering capability, it always gets rejected
>>>> by IBS PMU driver and thus perf falls back to non-precise sampling. Fix
>>>> it by not setting exclude_guest by default on AMD.
>>>>
>>>> Before:
>>>> $ sudo ./perf record -C 0 -vvv true |& grep precise
>>>> precise_ip 3
>>>> decreasing precise_ip by one (2)
>>>> precise_ip 2
>>>> decreasing precise_ip by one (1)
>>>> precise_ip 1
>>>> decreasing precise_ip by one (0)
>>>>
>>>> After:
>>>> $ sudo ./perf record -C 0 -vvv true |& grep precise
>>>> precise_ip 3
>>>> decreasing precise_ip by one (2)
>>>> precise_ip 2
>>>>
>>>> Reported-by: Kim Phillips <[email protected]>
>>>> Signed-off-by: Ravi Bangoria <[email protected]>
>>>
>>> It'd be nice if it can cover explicit -e cycles:pp as well. Anyway,
>>
>> Ravi, please consider Namhyung's request, a patch on top as I'm adding
>> this already.
>
>For explicit :pp modifier, evsel->precise_max does not get set and thus perf
>does not try with different attr->precise_ip values while exclude_guest set.
>So no issue with explicit :pp:
>
> $ sudo ./perf record -C 0 -e cycles:pp -vvv |& grep "precise_ip\|exclude_guest"
> precise_ip 2
> exclude_guest 1
> precise_ip 2
> exclude_guest 1
> switching off exclude_guest, exclude_host
> precise_ip 2
> ^C
>
>Also, with :P modifier, evsel->precise_max gets set but exclude_guest does
>not and thus :P also works fine:
>
> $ sudo ./perf record -C 0 -e cycles:P -vvv |& grep "precise_ip\|exclude_guest"
> precise_ip 3
> decreasing precise_ip by one (2)
> precise_ip 2
> ^C

Perfect, I'll add this analysis to the cset log message.

- Arnaldo
>
>Thanks,
>Ravi

2021-11-09 02:29:01

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v3] perf evsel: Fix missing exclude_{host,guest} setting

On Sun, Nov 7, 2021 at 2:26 AM Ravi Bangoria <[email protected]> wrote:
>
>
>
> On 07-Nov-21 12:54 AM, Arnaldo Carvalho de Melo wrote:
> > Em Fri, Nov 05, 2021 at 11:00:29AM -0700, Namhyung Kim escreveu:
> >> Hello,
> >>
> >> On Wed, Nov 3, 2021 at 12:22 AM Ravi Bangoria <[email protected]> wrote:
> >>>
> >>>> The current logic for the perf missing feature has a bug that it can
> >>>> wrongly clear some modifiers like G or H. Actually some PMUs don't
> >>>> support any filtering or exclusion while others do. But we check it
> >>>> as a global feature.
> >>>
> >>> (Sorry to pitch in bit late)
> >>>
> >>> AMD has one more problem on a similar line. On AMD, non-precise and
> >>> precise sampling are provided by core and IBS pmu respectively. Plus,
> >>> core pmu has filtering capability but IBS does not. Perf by default
> >>> sets precise_ip=3 and exclude_guest=1 and goes on decreasing precise_ip
> >>> with exclude_guest set until perf_event_open() succeeds. This is
> >>> causing perf to always fallback to core pmu (non-precise mode) even if
> >>> it's perfectly feasible to do precise sampling. Do you guys think this
> >>> problem should also be addressed while designing solution for Namhyung's
> >>> patch or solve it seperately like below patch:
> >>>
> >>> ---><---
> >>>
> >>> From 48808299679199c39ff737a30a7f387669314fd7 Mon Sep 17 00:00:00 2001
> >>> From: Ravi Bangoria <[email protected]>
> >>> Date: Tue, 2 Nov 2021 11:01:12 +0530
> >>> Subject: [PATCH] perf/amd/ibs: Don't set exclude_guest by default
> >>>
> >>> Perf tool sets exclude_guest by default while calling perf_event_open().
> >>> Because IBS does not have filtering capability, it always gets rejected
> >>> by IBS PMU driver and thus perf falls back to non-precise sampling. Fix
> >>> it by not setting exclude_guest by default on AMD.
> >>>
> >>> Before:
> >>> $ sudo ./perf record -C 0 -vvv true |& grep precise
> >>> precise_ip 3
> >>> decreasing precise_ip by one (2)
> >>> precise_ip 2
> >>> decreasing precise_ip by one (1)
> >>> precise_ip 1
> >>> decreasing precise_ip by one (0)
> >>>
> >>> After:
> >>> $ sudo ./perf record -C 0 -vvv true |& grep precise
> >>> precise_ip 3
> >>> decreasing precise_ip by one (2)
> >>> precise_ip 2
> >>>
> >>> Reported-by: Kim Phillips <[email protected]>
> >>> Signed-off-by: Ravi Bangoria <[email protected]>
> >>
> >> It'd be nice if it can cover explicit -e cycles:pp as well. Anyway,
> >
> > Ravi, please consider Namhyung's request, a patch on top as I'm adding
> > this already.
>
> For explicit :pp modifier, evsel->precise_max does not get set and thus perf
> does not try with different attr->precise_ip values while exclude_guest set.
> So no issue with explicit :pp:
>
> $ sudo ./perf record -C 0 -e cycles:pp -vvv |& grep "precise_ip\|exclude_guest"
> precise_ip 2
> exclude_guest 1
> precise_ip 2
> exclude_guest 1
> switching off exclude_guest, exclude_host
> precise_ip 2
> ^C
>
> Also, with :P modifier, evsel->precise_max gets set but exclude_guest does
> not and thus :P also works fine:
>
> $ sudo ./perf record -C 0 -e cycles:P -vvv |& grep "precise_ip\|exclude_guest"
> precise_ip 3
> decreasing precise_ip by one (2)
> precise_ip 2
> ^C

Ah, ok. Thanks for making it clear!

Thanks,
Namhyung