2024-05-21 01:05:00

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 0/3] Use BPF filters for a "perf top -u" workaround

Allow uid and gid to be terms in BPF filters by first breaking the
connection between filter terms and PERF_SAMPLE_xx values. Calculate
the uid and gid using the bpf_get_current_uid_gid helper, rather than
from a value in the sample. Allow filters to be passed to perf top, this allows:

$ perf top -e cycles:P --filter "uid == $(id -u)"

to work as a "perf top -u" workaround, as "perf top -u" usually fails
due to processes/threads terminating between the /proc scan and the
perf_event_open.

v2. Allow PERF_SAMPLE_xx to be computed from the PBF_TERM_xx value
using a shift as requested by Namhyung.

Ian Rogers (3):
perf bpf filter: Give terms their own enum
perf bpf filter: Add uid and gid terms
perf top: Allow filters on events

tools/perf/Documentation/perf-record.txt | 2 +-
tools/perf/Documentation/perf-top.txt | 4 ++
tools/perf/builtin-top.c | 9 +++
tools/perf/util/bpf-filter.c | 33 ++++++----
tools/perf/util/bpf-filter.h | 5 +-
tools/perf/util/bpf-filter.l | 66 ++++++++++----------
tools/perf/util/bpf-filter.y | 7 ++-
tools/perf/util/bpf_skel/sample-filter.h | 61 +++++++++++++++++-
tools/perf/util/bpf_skel/sample_filter.bpf.c | 54 +++++++++++-----
9 files changed, 171 insertions(+), 70 deletions(-)

--
2.45.0.rc1.225.g2a3ae87e7f-goog



2024-05-21 01:05:19

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 1/3] perf bpf filter: Give terms their own enum

Give the term types their own enum so that additional terms can be
added that don't correspond to a PERF_SAMPLE_xx flag. The term values
are numerically ascending rather than bit field positions, this means
they need translating to a PERF_SAMPLE_xx bit field in certain places
using a shift.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/bpf-filter.c | 28 +++++----
tools/perf/util/bpf-filter.h | 5 +-
tools/perf/util/bpf-filter.l | 64 ++++++++++----------
tools/perf/util/bpf-filter.y | 7 ++-
tools/perf/util/bpf_skel/sample-filter.h | 58 +++++++++++++++++-
tools/perf/util/bpf_skel/sample_filter.bpf.c | 50 +++++++++------
6 files changed, 143 insertions(+), 69 deletions(-)

diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c
index b51544996046..f10148623a8e 100644
--- a/tools/perf/util/bpf-filter.c
+++ b/tools/perf/util/bpf-filter.c
@@ -17,11 +17,11 @@

#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))

-#define __PERF_SAMPLE_TYPE(st, opt) { st, #st, opt }
-#define PERF_SAMPLE_TYPE(_st, opt) __PERF_SAMPLE_TYPE(PERF_SAMPLE_##_st, opt)
+#define __PERF_SAMPLE_TYPE(tt, st, opt) { tt, #st, opt }
+#define PERF_SAMPLE_TYPE(_st, opt) __PERF_SAMPLE_TYPE(PBF_TERM_##_st, PERF_SAMPLE_##_st, opt)

static const struct perf_sample_info {
- u64 type;
+ enum perf_bpf_filter_term type;
const char *name;
const char *option;
} sample_table[] = {
@@ -44,12 +44,12 @@ static const struct perf_sample_info {
PERF_SAMPLE_TYPE(DATA_PAGE_SIZE, "--data-page-size"),
};

-static const struct perf_sample_info *get_sample_info(u64 flags)
+static const struct perf_sample_info *get_sample_info(enum perf_bpf_filter_term type)
{
size_t i;

for (i = 0; i < ARRAY_SIZE(sample_table); i++) {
- if (sample_table[i].type == flags)
+ if (sample_table[i].type == type)
return &sample_table[i];
}
return NULL;
@@ -59,7 +59,8 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr *
{
const struct perf_sample_info *info;

- if (evsel->core.attr.sample_type & expr->sample_flags)
+ if (expr->term >= PBF_TERM_SAMPLE_START && expr->term <= PBF_TERM_SAMPLE_END &&
+ (evsel->core.attr.sample_type & (1 << (expr->term - PBF_TERM_SAMPLE_START))))
return 0;

if (expr->op == PBF_OP_GROUP_BEGIN) {
@@ -72,10 +73,10 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr *
return 0;
}

- info = get_sample_info(expr->sample_flags);
+ info = get_sample_info(expr->term);
if (info == NULL) {
- pr_err("Error: %s event does not have sample flags %lx\n",
- evsel__name(evsel), expr->sample_flags);
+ pr_err("Error: %s event does not have sample flags %d\n",
+ evsel__name(evsel), expr->term);
return -1;
}

@@ -105,7 +106,7 @@ int perf_bpf_filter__prepare(struct evsel *evsel)
struct perf_bpf_filter_entry entry = {
.op = expr->op,
.part = expr->part,
- .flags = expr->sample_flags,
+ .term = expr->term,
.value = expr->val,
};

@@ -122,7 +123,7 @@ int perf_bpf_filter__prepare(struct evsel *evsel)
struct perf_bpf_filter_entry group_entry = {
.op = group->op,
.part = group->part,
- .flags = group->sample_flags,
+ .term = group->term,
.value = group->val,
};
bpf_map_update_elem(fd, &i, &group_entry, BPF_ANY);
@@ -173,7 +174,8 @@ u64 perf_bpf_filter__lost_count(struct evsel *evsel)
return skel ? skel->bss->dropped : 0;
}

-struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flags, int part,
+struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term,
+ int part,
enum perf_bpf_filter_op op,
unsigned long val)
{
@@ -181,7 +183,7 @@ struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flag

expr = malloc(sizeof(*expr));
if (expr != NULL) {
- expr->sample_flags = sample_flags;
+ expr->term = term;
expr->part = part;
expr->op = op;
expr->val = val;
diff --git a/tools/perf/util/bpf-filter.h b/tools/perf/util/bpf-filter.h
index 7afd159411b8..cd6764442c16 100644
--- a/tools/perf/util/bpf-filter.h
+++ b/tools/perf/util/bpf-filter.h
@@ -11,14 +11,15 @@ struct perf_bpf_filter_expr {
struct list_head groups;
enum perf_bpf_filter_op op;
int part;
- unsigned long sample_flags;
+ enum perf_bpf_filter_term term;
unsigned long val;
};

struct evsel;

#ifdef HAVE_BPF_SKEL
-struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flags, int part,
+struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term,
+ int part,
enum perf_bpf_filter_op op,
unsigned long val);
int perf_bpf_filter__parse(struct list_head *expr_head, const char *str);
diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l
index d4ff0f1345cd..62c959813466 100644
--- a/tools/perf/util/bpf-filter.l
+++ b/tools/perf/util/bpf-filter.l
@@ -9,16 +9,16 @@
#include "bpf-filter.h"
#include "bpf-filter-bison.h"

-static int sample(unsigned long sample_flag)
+static int sample(enum perf_bpf_filter_term term)
{
- perf_bpf_filter_lval.sample.type = sample_flag;
+ perf_bpf_filter_lval.sample.term = term;
perf_bpf_filter_lval.sample.part = 0;
return BFT_SAMPLE;
}

-static int sample_part(unsigned long sample_flag, int part)
+static int sample_part(enum perf_bpf_filter_term term, int part)
{
- perf_bpf_filter_lval.sample.type = sample_flag;
+ perf_bpf_filter_lval.sample.term = term;
perf_bpf_filter_lval.sample.part = part;
return BFT_SAMPLE;
}
@@ -67,34 +67,34 @@ ident [_a-zA-Z][_a-zA-Z0-9]+
{num_hex} { return value(16); }
{space} { }

-ip { return sample(PERF_SAMPLE_IP); }
-id { return sample(PERF_SAMPLE_ID); }
-tid { return sample(PERF_SAMPLE_TID); }
-pid { return sample_part(PERF_SAMPLE_TID, 1); }
-cpu { return sample(PERF_SAMPLE_CPU); }
-time { return sample(PERF_SAMPLE_TIME); }
-addr { return sample(PERF_SAMPLE_ADDR); }
-period { return sample(PERF_SAMPLE_PERIOD); }
-txn { return sample(PERF_SAMPLE_TRANSACTION); }
-weight { return sample(PERF_SAMPLE_WEIGHT); }
-weight1 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 1); }
-weight2 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 2); }
-weight3 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); }
-ins_lat { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 2); } /* alias for weight2 */
-p_stage_cyc { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } /* alias for weight3 */
-retire_lat { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } /* alias for weight3 */
-phys_addr { return sample(PERF_SAMPLE_PHYS_ADDR); }
-code_pgsz { return sample(PERF_SAMPLE_CODE_PAGE_SIZE); }
-data_pgsz { return sample(PERF_SAMPLE_DATA_PAGE_SIZE); }
-mem_op { return sample_part(PERF_SAMPLE_DATA_SRC, 1); }
-mem_lvlnum { return sample_part(PERF_SAMPLE_DATA_SRC, 2); }
-mem_lvl { return sample_part(PERF_SAMPLE_DATA_SRC, 2); } /* alias for mem_lvlnum */
-mem_snoop { return sample_part(PERF_SAMPLE_DATA_SRC, 3); } /* include snoopx */
-mem_remote { return sample_part(PERF_SAMPLE_DATA_SRC, 4); }
-mem_lock { return sample_part(PERF_SAMPLE_DATA_SRC, 5); }
-mem_dtlb { return sample_part(PERF_SAMPLE_DATA_SRC, 6); }
-mem_blk { return sample_part(PERF_SAMPLE_DATA_SRC, 7); }
-mem_hops { return sample_part(PERF_SAMPLE_DATA_SRC, 8); }
+ip { return sample(PBF_TERM_IP); }
+id { return sample(PBF_TERM_ID); }
+tid { return sample(PBF_TERM_TID); }
+pid { return sample_part(PBF_TERM_TID, 1); }
+cpu { return sample(PBF_TERM_CPU); }
+time { return sample(PBF_TERM_TIME); }
+addr { return sample(PBF_TERM_ADDR); }
+period { return sample(PBF_TERM_PERIOD); }
+txn { return sample(PBF_TERM_TRANSACTION); }
+weight { return sample(PBF_TERM_WEIGHT); }
+weight1 { return sample_part(PBF_TERM_WEIGHT_STRUCT, 1); }
+weight2 { return sample_part(PBF_TERM_WEIGHT_STRUCT, 2); }
+weight3 { return sample_part(PBF_TERM_WEIGHT_STRUCT, 3); }
+ins_lat { return sample_part(PBF_TERM_WEIGHT_STRUCT, 2); } /* alias for weight2 */
+p_stage_cyc { return sample_part(PBF_TERM_WEIGHT_STRUCT, 3); } /* alias for weight3 */
+retire_lat { return sample_part(PBF_TERM_WEIGHT_STRUCT, 3); } /* alias for weight3 */
+phys_addr { return sample(PBF_TERM_PHYS_ADDR); }
+code_pgsz { return sample(PBF_TERM_CODE_PAGE_SIZE); }
+data_pgsz { return sample(PBF_TERM_DATA_PAGE_SIZE); }
+mem_op { return sample_part(PBF_TERM_DATA_SRC, 1); }
+mem_lvlnum { return sample_part(PBF_TERM_DATA_SRC, 2); }
+mem_lvl { return sample_part(PBF_TERM_DATA_SRC, 2); } /* alias for mem_lvlnum */
+mem_snoop { return sample_part(PBF_TERM_DATA_SRC, 3); } /* include snoopx */
+mem_remote { return sample_part(PBF_TERM_DATA_SRC, 4); }
+mem_lock { return sample_part(PBF_TERM_DATA_SRC, 5); }
+mem_dtlb { return sample_part(PBF_TERM_DATA_SRC, 6); }
+mem_blk { return sample_part(PBF_TERM_DATA_SRC, 7); }
+mem_hops { return sample_part(PBF_TERM_DATA_SRC, 8); }

"==" { return operator(PBF_OP_EQ); }
"!=" { return operator(PBF_OP_NEQ); }
diff --git a/tools/perf/util/bpf-filter.y b/tools/perf/util/bpf-filter.y
index 0e4d6de3c2ad..0c56fccb8874 100644
--- a/tools/perf/util/bpf-filter.y
+++ b/tools/perf/util/bpf-filter.y
@@ -27,7 +27,7 @@ static void perf_bpf_filter_error(struct list_head *expr __maybe_unused,
{
unsigned long num;
struct {
- unsigned long type;
+ enum perf_bpf_filter_term term;
int part;
} sample;
enum perf_bpf_filter_op op;
@@ -62,7 +62,8 @@ filter_term BFT_LOGICAL_OR filter_expr
if ($1->op == PBF_OP_GROUP_BEGIN) {
expr = $1;
} else {
- expr = perf_bpf_filter_expr__new(0, 0, PBF_OP_GROUP_BEGIN, 1);
+ expr = perf_bpf_filter_expr__new(PBF_TERM_NONE, /*part=*/0,
+ PBF_OP_GROUP_BEGIN, /*val=*/1);
list_add_tail(&$1->list, &expr->groups);
}
expr->val++;
@@ -78,7 +79,7 @@ filter_expr
filter_expr:
BFT_SAMPLE BFT_OP BFT_NUM
{
- $$ = perf_bpf_filter_expr__new($1.type, $1.part, $2, $3);
+ $$ = perf_bpf_filter_expr__new($1.term, $1.part, $2, $3);
}

%%
diff --git a/tools/perf/util/bpf_skel/sample-filter.h b/tools/perf/util/bpf_skel/sample-filter.h
index 2e96e1ab084a..31bfab7e8d6c 100644
--- a/tools/perf/util/bpf_skel/sample-filter.h
+++ b/tools/perf/util/bpf_skel/sample-filter.h
@@ -1,6 +1,8 @@
#ifndef PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H
#define PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H

+#include <linux/perf_event.h>
+
#define MAX_FILTERS 64

/* supported filter operations */
@@ -16,12 +18,64 @@ enum perf_bpf_filter_op {
PBF_OP_GROUP_END,
};

+enum perf_bpf_filter_term {
+ /* No term is in use. */
+ PBF_TERM_NONE = 0,
+ /* Terms that correspond to PERF_SAMPLE_xx values. */
+ PBF_TERM_SAMPLE_START = PBF_TERM_NONE + 1,
+ PBF_TERM_IP = PBF_TERM_SAMPLE_START + 0, /* SAMPLE_IP = 1U << 0 */
+ PBF_TERM_TID = PBF_TERM_SAMPLE_START + 1, /* SAMPLE_TID = 1U << 1 */
+ PBF_TERM_TIME = PBF_TERM_SAMPLE_START + 2, /* SAMPLE_TIME = 1U << 2 */
+ PBF_TERM_ADDR = PBF_TERM_SAMPLE_START + 3, /* SAMPLE_ADDR = 1U << 3 */
+ __PBF_UNUSED_TERM4 = PBF_TERM_SAMPLE_START + 4, /* SAMPLE_READ = 1U << 4 */
+ __PBF_UNUSED_TERM5 = PBF_TERM_SAMPLE_START + 5, /* SAMPLE_CALLCHAIN = 1U << 5 */
+ PBF_TERM_ID = PBF_TERM_SAMPLE_START + 6, /* SAMPLE_ID = 1U << 6 */
+ PBF_TERM_CPU = PBF_TERM_SAMPLE_START + 7, /* SAMPLE_CPU = 1U << 7 */
+ PBF_TERM_PERIOD = PBF_TERM_SAMPLE_START + 8, /* SAMPLE_PERIOD = 1U << 8 */
+ __PBF_UNUSED_TERM9 = PBF_TERM_SAMPLE_START + 9, /* SAMPLE_STREAM_ID = 1U << 9 */
+ __PBF_UNUSED_TERM10 = PBF_TERM_SAMPLE_START + 10, /* SAMPLE_RAW = 1U << 10 */
+ __PBF_UNUSED_TERM11 = PBF_TERM_SAMPLE_START + 11, /* SAMPLE_BRANCH_STACK = 1U << 11 */
+ __PBF_UNUSED_TERM12 = PBF_TERM_SAMPLE_START + 12, /* SAMPLE_REGS_USER = 1U << 12 */
+ __PBF_UNUSED_TERM13 = PBF_TERM_SAMPLE_START + 13, /* SAMPLE_STACK_USER = 1U << 13 */
+ PBF_TERM_WEIGHT = PBF_TERM_SAMPLE_START + 14, /* SAMPLE_WEIGHT = 1U << 14 */
+ PBF_TERM_DATA_SRC = PBF_TERM_SAMPLE_START + 15, /* SAMPLE_DATA_SRC = 1U << 15 */
+ __PBF_UNUSED_TERM16 = PBF_TERM_SAMPLE_START + 16, /* SAMPLE_IDENTIFIER = 1U << 16 */
+ PBF_TERM_TRANSACTION = PBF_TERM_SAMPLE_START + 17, /* SAMPLE_TRANSACTION = 1U << 17 */
+ __PBF_UNUSED_TERM18 = PBF_TERM_SAMPLE_START + 18, /* SAMPLE_REGS_INTR = 1U << 18 */
+ PBF_TERM_PHYS_ADDR = PBF_TERM_SAMPLE_START + 19, /* SAMPLE_PHYS_ADDR = 1U << 19 */
+ __PBF_UNUSED_TERM20 = PBF_TERM_SAMPLE_START + 20, /* SAMPLE_AUX = 1U << 20 */
+ __PBF_UNUSED_TERM21 = PBF_TERM_SAMPLE_START + 21, /* SAMPLE_CGROUP = 1U << 21 */
+ PBF_TERM_DATA_PAGE_SIZE = PBF_TERM_SAMPLE_START + 22, /* SAMPLE_DATA_PAGE_SIZE = 1U << 22 */
+ PBF_TERM_CODE_PAGE_SIZE = PBF_TERM_SAMPLE_START + 23, /* SAMPLE_CODE_PAGE_SIZE = 1U << 23 */
+ PBF_TERM_WEIGHT_STRUCT = PBF_TERM_SAMPLE_START + 24, /* SAMPLE_WEIGHT_STRUCT = 1U << 24 */
+ PBF_TERM_SAMPLE_END = PBF_TERM_WEIGHT_STRUCT,
+};
+
+#define BUILD_CHECK_SAMPLE(x) \
+ _Static_assert((1 << (PBF_TERM_##x - PBF_TERM_SAMPLE_START)) == PERF_SAMPLE_##x, \
+ "Mismatched PBF term to sample bit " #x)
+BUILD_CHECK_SAMPLE(IP);
+BUILD_CHECK_SAMPLE(TID);
+BUILD_CHECK_SAMPLE(TIME);
+BUILD_CHECK_SAMPLE(ADDR);
+BUILD_CHECK_SAMPLE(ID);
+BUILD_CHECK_SAMPLE(CPU);
+BUILD_CHECK_SAMPLE(PERIOD);
+BUILD_CHECK_SAMPLE(WEIGHT);
+BUILD_CHECK_SAMPLE(DATA_SRC);
+BUILD_CHECK_SAMPLE(TRANSACTION);
+BUILD_CHECK_SAMPLE(PHYS_ADDR);
+BUILD_CHECK_SAMPLE(DATA_PAGE_SIZE);
+BUILD_CHECK_SAMPLE(CODE_PAGE_SIZE);
+BUILD_CHECK_SAMPLE(WEIGHT_STRUCT);
+#undef BUILD_CHECK_SAMPLE
+
/* BPF map entry for filtering */
struct perf_bpf_filter_entry {
enum perf_bpf_filter_op op;
__u32 part; /* sub-sample type info when it has multiple values */
- __u64 flags; /* perf sample type flags */
+ enum perf_bpf_filter_term term;
__u64 value;
};

-#endif /* PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H */
\ No newline at end of file
+#endif /* PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H */
diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c
index fb94f5280626..7bcf0c3de292 100644
--- a/tools/perf/util/bpf_skel/sample_filter.bpf.c
+++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c
@@ -48,31 +48,35 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
{
struct perf_sample_data___new *data = (void *)kctx->data;

- if (!bpf_core_field_exists(data->sample_flags) ||
- (data->sample_flags & entry->flags) == 0)
+ if (!bpf_core_field_exists(data->sample_flags))
return 0;

- switch (entry->flags) {
- case PERF_SAMPLE_IP:
+ /* For sample terms check the sample bit is set. */
+ if (entry->term >= PBF_TERM_SAMPLE_START && entry->term <= PBF_TERM_SAMPLE_END &&
+ (data->sample_flags & (1 << (entry->term - PBF_TERM_SAMPLE_START))) == 0)
+ return 0;
+
+ switch (entry->term) {
+ case PBF_TERM_IP:
return kctx->data->ip;
- case PERF_SAMPLE_ID:
+ case PBF_TERM_ID:
return kctx->data->id;
- case PERF_SAMPLE_TID:
+ case PBF_TERM_TID:
if (entry->part)
return kctx->data->tid_entry.pid;
else
return kctx->data->tid_entry.tid;
- case PERF_SAMPLE_CPU:
+ case PBF_TERM_CPU:
return kctx->data->cpu_entry.cpu;
- case PERF_SAMPLE_TIME:
+ case PBF_TERM_TIME:
return kctx->data->time;
- case PERF_SAMPLE_ADDR:
+ case PBF_TERM_ADDR:
return kctx->data->addr;
- case PERF_SAMPLE_PERIOD:
+ case PBF_TERM_PERIOD:
return kctx->data->period;
- case PERF_SAMPLE_TRANSACTION:
+ case PBF_TERM_TRANSACTION:
return kctx->data->txn;
- case PERF_SAMPLE_WEIGHT_STRUCT:
+ case PBF_TERM_WEIGHT_STRUCT:
if (entry->part == 1)
return kctx->data->weight.var1_dw;
if (entry->part == 2)
@@ -80,15 +84,15 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
if (entry->part == 3)
return kctx->data->weight.var3_w;
/* fall through */
- case PERF_SAMPLE_WEIGHT:
+ case PBF_TERM_WEIGHT:
return kctx->data->weight.full;
- case PERF_SAMPLE_PHYS_ADDR:
+ case PBF_TERM_PHYS_ADDR:
return kctx->data->phys_addr;
- case PERF_SAMPLE_CODE_PAGE_SIZE:
+ case PBF_TERM_CODE_PAGE_SIZE:
return kctx->data->code_page_size;
- case PERF_SAMPLE_DATA_PAGE_SIZE:
+ case PBF_TERM_DATA_PAGE_SIZE:
return kctx->data->data_page_size;
- case PERF_SAMPLE_DATA_SRC:
+ case PBF_TERM_DATA_SRC:
if (entry->part == 1)
return kctx->data->data_src.mem_op;
if (entry->part == 2)
@@ -117,6 +121,18 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
}
/* return the whole word */
return kctx->data->data_src.val;
+ case PBF_TERM_NONE:
+ case __PBF_UNUSED_TERM4:
+ case __PBF_UNUSED_TERM5:
+ case __PBF_UNUSED_TERM9:
+ case __PBF_UNUSED_TERM10:
+ case __PBF_UNUSED_TERM11:
+ case __PBF_UNUSED_TERM12:
+ case __PBF_UNUSED_TERM13:
+ case __PBF_UNUSED_TERM16:
+ case __PBF_UNUSED_TERM18:
+ case __PBF_UNUSED_TERM20:
+ case __PBF_UNUSED_TERM21:
default:
break;
}
--
2.45.0.rc1.225.g2a3ae87e7f-goog


2024-05-21 01:05:23

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 2/3] perf bpf filter: Add uid and gid terms

Allow the BPF filter to use the uid and gid terms determined by the
bpf_get_current_uid_gid BPF helper. For example, the following will
record the cpu-clock event system wide discarding samples that don't
belong to the current user.

$ perf record -e cpu-clock --filter "uid == $(id -u)" -a sleep 0.1

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/Documentation/perf-record.txt | 2 +-
tools/perf/util/bpf-filter.c | 5 +++++
tools/perf/util/bpf-filter.l | 2 ++
tools/perf/util/bpf_skel/sample-filter.h | 3 +++
tools/perf/util/bpf_skel/sample_filter.bpf.c | 4 ++++
5 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 6015fdd08fb6..059bc40c5ee1 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -200,7 +200,7 @@ OPTIONS
ip, id, tid, pid, cpu, time, addr, period, txn, weight, phys_addr,
code_pgsz, data_pgsz, weight1, weight2, weight3, ins_lat, retire_lat,
p_stage_cyc, mem_op, mem_lvl, mem_snoop, mem_remote, mem_lock,
- mem_dtlb, mem_blk, mem_hops
+ mem_dtlb, mem_blk, mem_hops, uid, gid

The <operator> can be one of:
==, !=, >, >=, <, <=, &
diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c
index f10148623a8e..04f98b6bb291 100644
--- a/tools/perf/util/bpf-filter.c
+++ b/tools/perf/util/bpf-filter.c
@@ -63,6 +63,11 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr *
(evsel->core.attr.sample_type & (1 << (expr->term - PBF_TERM_SAMPLE_START))))
return 0;

+ if (expr->term == PBF_TERM_UID || expr->term == PBF_TERM_GID) {
+ /* Not dependent on the sample_type as computed from a BPF helper. */
+ return 0;
+ }
+
if (expr->op == PBF_OP_GROUP_BEGIN) {
struct perf_bpf_filter_expr *group;

diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l
index 62c959813466..2a7c839f3fae 100644
--- a/tools/perf/util/bpf-filter.l
+++ b/tools/perf/util/bpf-filter.l
@@ -95,6 +95,8 @@ mem_lock { return sample_part(PBF_TERM_DATA_SRC, 5); }
mem_dtlb { return sample_part(PBF_TERM_DATA_SRC, 6); }
mem_blk { return sample_part(PBF_TERM_DATA_SRC, 7); }
mem_hops { return sample_part(PBF_TERM_DATA_SRC, 8); }
+uid { return sample(PBF_TERM_UID); }
+gid { return sample(PBF_TERM_GID); }

"==" { return operator(PBF_OP_EQ); }
"!=" { return operator(PBF_OP_NEQ); }
diff --git a/tools/perf/util/bpf_skel/sample-filter.h b/tools/perf/util/bpf_skel/sample-filter.h
index 31bfab7e8d6c..dd98bd1a2669 100644
--- a/tools/perf/util/bpf_skel/sample-filter.h
+++ b/tools/perf/util/bpf_skel/sample-filter.h
@@ -49,6 +49,9 @@ enum perf_bpf_filter_term {
PBF_TERM_CODE_PAGE_SIZE = PBF_TERM_SAMPLE_START + 23, /* SAMPLE_CODE_PAGE_SIZE = 1U << 23 */
PBF_TERM_WEIGHT_STRUCT = PBF_TERM_SAMPLE_START + 24, /* SAMPLE_WEIGHT_STRUCT = 1U << 24 */
PBF_TERM_SAMPLE_END = PBF_TERM_WEIGHT_STRUCT,
+ /* Terms computed from BPF helpers. */
+ PBF_TERM_UID,
+ PBF_TERM_GID,
};

#define BUILD_CHECK_SAMPLE(x) \
diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c
index 7bcf0c3de292..1fc319ec5e3f 100644
--- a/tools/perf/util/bpf_skel/sample_filter.bpf.c
+++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c
@@ -121,6 +121,10 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
}
/* return the whole word */
return kctx->data->data_src.val;
+ case PBF_TERM_UID:
+ return bpf_get_current_uid_gid() & 0xFFFFFFFF;
+ case PBF_TERM_GID:
+ return bpf_get_current_uid_gid() >> 32;
case PBF_TERM_NONE:
case __PBF_UNUSED_TERM4:
case __PBF_UNUSED_TERM5:
--
2.45.0.rc1.225.g2a3ae87e7f-goog


2024-05-21 01:05:39

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 3/3] perf top: Allow filters on events

Allow filters to be added to perf top events. One use is to workaround
issues with:
```
$ perf top --uid="$(id -u)"
```
which tries to scan /proc find processes belonging to the uid and can
fail in such a pid terminates between the scan and the
perf_event_open reporting:
```
Error:
The sys_perf_event_open() syscall returned with 3 (No such process) for event (cycles:P).
/bin/dmesg | grep -i perf may provide additional information.
```
A similar filter:
```
$ perf top -e cycles:P --filter "uid == $(id -u)"
```
doesn't fail this way.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/Documentation/perf-top.txt | 4 ++++
tools/perf/builtin-top.c | 9 +++++++++
2 files changed, 13 insertions(+)

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index a754875fa5bb..667e5102075e 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -43,6 +43,10 @@ Default is to monitor all CPUS.
encoding with the layout of the event control registers as described
by entries in /sys/bus/event_source/devices/cpu/format/*.

+--filter=<filter>::
+ Event filter. This option should follow an event selector (-e). For
+ syntax see linkperf:perf-record[1].
+
-E <entries>::
--entries=<entries>::
Display this many functions.
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1d6aef51c122..e8cbbf10d361 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1055,6 +1055,13 @@ static int perf_top__start_counters(struct perf_top *top)
}
}

+ if (evlist__apply_filters(evlist, &counter)) {
+ pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
+ counter->filter ?: "BPF", evsel__name(counter), errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ goto out_err;
+ }
+
if (evlist__mmap(evlist, opts->mmap_pages) < 0) {
ui__error("Failed to mmap with %d (%s)\n",
errno, str_error_r(errno, msg, sizeof(msg)));
@@ -1462,6 +1469,8 @@ int cmd_top(int argc, const char **argv)
OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
"event selector. use 'perf list' to list available events",
parse_events_option),
+ OPT_CALLBACK(0, "filter", &top.evlist, "filter",
+ "event filter", parse_filter),
OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
OPT_STRING('p', "pid", &target->pid, "pid",
"profile events on existing process id"),
--
2.45.0.rc1.225.g2a3ae87e7f-goog


2024-05-23 04:39:42

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v2 0/3] Use BPF filters for a "perf top -u" workaround

On Mon, May 20, 2024 at 6:04 PM Ian Rogers <[email protected]> wrote:
>
> Allow uid and gid to be terms in BPF filters by first breaking the
> connection between filter terms and PERF_SAMPLE_xx values. Calculate
> the uid and gid using the bpf_get_current_uid_gid helper, rather than
> from a value in the sample. Allow filters to be passed to perf top, this allows:
>
> $ perf top -e cycles:P --filter "uid == $(id -u)"
>
> to work as a "perf top -u" workaround, as "perf top -u" usually fails
> due to processes/threads terminating between the /proc scan and the
> perf_event_open.
>
> v2. Allow PERF_SAMPLE_xx to be computed from the PBF_TERM_xx value
> using a shift as requested by Namhyung.
>
> Ian Rogers (3):
> perf bpf filter: Give terms their own enum
> perf bpf filter: Add uid and gid terms
> perf top: Allow filters on events

Acked-by: Namhyung Kim <[email protected]>

Thanks,
Namhyung

>
> tools/perf/Documentation/perf-record.txt | 2 +-
> tools/perf/Documentation/perf-top.txt | 4 ++
> tools/perf/builtin-top.c | 9 +++
> tools/perf/util/bpf-filter.c | 33 ++++++----
> tools/perf/util/bpf-filter.h | 5 +-
> tools/perf/util/bpf-filter.l | 66 ++++++++++----------
> tools/perf/util/bpf-filter.y | 7 ++-
> tools/perf/util/bpf_skel/sample-filter.h | 61 +++++++++++++++++-
> tools/perf/util/bpf_skel/sample_filter.bpf.c | 54 +++++++++++-----
> 9 files changed, 171 insertions(+), 70 deletions(-)
>
> --
> 2.45.0.rc1.225.g2a3ae87e7f-goog
>

2024-05-24 17:58:18

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v2 0/3] Use BPF filters for a "perf top -u" workaround

On Wed, May 22, 2024 at 09:38:57PM -0700, Namhyung Kim wrote:
> On Mon, May 20, 2024 at 6:04 PM Ian Rogers <[email protected]> wrote:
> >
> > Allow uid and gid to be terms in BPF filters by first breaking the
> > connection between filter terms and PERF_SAMPLE_xx values. Calculate
> > the uid and gid using the bpf_get_current_uid_gid helper, rather than
> > from a value in the sample. Allow filters to be passed to perf top, this allows:
> >
> > $ perf top -e cycles:P --filter "uid == $(id -u)"
> >
> > to work as a "perf top -u" workaround, as "perf top -u" usually fails
> > due to processes/threads terminating between the /proc scan and the
> > perf_event_open.
> >
> > v2. Allow PERF_SAMPLE_xx to be computed from the PBF_TERM_xx value
> > using a shift as requested by Namhyung.
> >
> > Ian Rogers (3):
> > perf bpf filter: Give terms their own enum
> > perf bpf filter: Add uid and gid terms
> > perf top: Allow filters on events

Hmm.. I'm seeing this.

$ make build-test
...
cd . && make GEN_VMLINUX_H=1 FEATURES_DUMP=/home/namhyung/project/linux/tools/perf/BUILD_TEST_FEATURE_DUMP -j64 O=/tmp/tmp.EeXFOfLPt5 DESTDIR=/tmp/tmp.Y0eiZKvc9D
...
CLANG /tmp/tmp.EeXFOfLPt5/util/bpf_skel/.tmp/sample_filter.bpf.o
In file included from util/bpf_skel/sample_filter.bpf.c:8:
In file included from util/bpf_skel/sample-filter.h:4:
/home/namhyung/project/linux/tools/include/uapi/linux/perf_event.h:29:6: error: redefinition of 'perf_type_id'
enum perf_type_id {
^
/tmp/tmp.EeXFOfLPt5/util/bpf_skel/.tmp/../vmlinux.h:54086:6: note: previous definition is here
enum perf_type_id {
^
In file included from util/bpf_skel/sample_filter.bpf.c:8:
In file included from util/bpf_skel/sample-filter.h:4:
/home/namhyung/project/linux/tools/include/uapi/linux/perf_event.h:30:2: error: redefinition of enumerator 'PERF_TYPE_HARDWARE'
PERF_TYPE_HARDWARE = 0,
^
/tmp/tmp.EeXFOfLPt5/util/bpf_skel/.tmp/../vmlinux.h:54087:2: note: previous definition is here
PERF_TYPE_HARDWARE = 0,
^
...
make[3]: *** [Makefile.perf:264: sub-make] Error 2
make[2]: *** [Makefile:70: all] Error 2
make[1]: *** [tests/make:340: make_gen_vmlinux_h_O] Error 1
make: *** [Makefile:103: build-test] Error 2

Thanks,
Namhyung


2024-05-24 20:23:10

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH v2 0/3] Use BPF filters for a "perf top -u" workaround

On Fri, May 24, 2024 at 10:58 AM Namhyung Kim <[email protected]> wrote:
>
> On Wed, May 22, 2024 at 09:38:57PM -0700, Namhyung Kim wrote:
> > On Mon, May 20, 2024 at 6:04 PM Ian Rogers <[email protected]> wrote:
> > >
> > > Allow uid and gid to be terms in BPF filters by first breaking the
> > > connection between filter terms and PERF_SAMPLE_xx values. Calculate
> > > the uid and gid using the bpf_get_current_uid_gid helper, rather than
> > > from a value in the sample. Allow filters to be passed to perf top, this allows:
> > >
> > > $ perf top -e cycles:P --filter "uid == $(id -u)"
> > >
> > > to work as a "perf top -u" workaround, as "perf top -u" usually fails
> > > due to processes/threads terminating between the /proc scan and the
> > > perf_event_open.
> > >
> > > v2. Allow PERF_SAMPLE_xx to be computed from the PBF_TERM_xx value
> > > using a shift as requested by Namhyung.
> > >
> > > Ian Rogers (3):
> > > perf bpf filter: Give terms their own enum
> > > perf bpf filter: Add uid and gid terms
> > > perf top: Allow filters on events
>
> Hmm.. I'm seeing this.
>
> $ make build-test
> ...
> cd . && make GEN_VMLINUX_H=1 FEATURES_DUMP=/home/namhyung/project/linux/tools/perf/BUILD_TEST_FEATURE_DUMP -j64 O=/tmp/tmp.EeXFOfLPt5 DESTDIR=/tmp/tmp.Y0eiZKvc9D
> ...
> CLANG /tmp/tmp.EeXFOfLPt5/util/bpf_skel/.tmp/sample_filter.bpf.o
> In file included from util/bpf_skel/sample_filter.bpf.c:8:
> In file included from util/bpf_skel/sample-filter.h:4:
> /home/namhyung/project/linux/tools/include/uapi/linux/perf_event.h:29:6: error: redefinition of 'perf_type_id'
> enum perf_type_id {
> ^
> /tmp/tmp.EeXFOfLPt5/util/bpf_skel/.tmp/../vmlinux.h:54086:6: note: previous definition is here
> enum perf_type_id {
> ^
> In file included from util/bpf_skel/sample_filter.bpf.c:8:
> In file included from util/bpf_skel/sample-filter.h:4:
> /home/namhyung/project/linux/tools/include/uapi/linux/perf_event.h:30:2: error: redefinition of enumerator 'PERF_TYPE_HARDWARE'
> PERF_TYPE_HARDWARE = 0,
> ^
> /tmp/tmp.EeXFOfLPt5/util/bpf_skel/.tmp/../vmlinux.h:54087:2: note: previous definition is here
> PERF_TYPE_HARDWARE = 0,
> ^
> ...
> make[3]: *** [Makefile.perf:264: sub-make] Error 2
> make[2]: *** [Makefile:70: all] Error 2
> make[1]: *** [tests/make:340: make_gen_vmlinux_h_O] Error 1
> make: *** [Makefile:103: build-test] Error 2

IIRC this only exists for the sake of asserts. I can move the asserts
to placate the generated vmlinux.h. I'll send a v3.

Thanks,
Ian

> Thanks,
> Namhyung
>