2022-09-21 23:06:19

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 1/2] perf: Use sample_flags for addr

Use the new sample_flags to indicate whether the addr field is filled by
the PMU driver. As most PMU drivers pass 0, it can set the flag only if
it has a non-zero value. And use 0 in perf_sample_output() if it's not
filled already.

Cc: Kan Liang <[email protected]>
Signed-off-by: Namhyung Kim <[email protected]>
---
arch/x86/events/intel/ds.c | 8 ++++++--
include/linux/perf_event.h | 8 ++++++--
kernel/events/core.c | 5 +++++
3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 4ba6ab6d0d92..d2e9ff16f6ed 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1621,8 +1621,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,


if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
- x86_pmu.intel_cap.pebs_format >= 1)
+ x86_pmu.intel_cap.pebs_format >= 1) {
data->addr = pebs->dla;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }

if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */
@@ -1783,8 +1785,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
}

- if (sample_type & PERF_SAMPLE_ADDR_TYPE)
+ if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
data->addr = meminfo->address;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }

if (sample_type & PERF_SAMPLE_TRANSACTION) {
data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 368bdc4f563f..f4a13579b0e8 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1028,7 +1028,6 @@ struct perf_sample_data {
* minimize the cachelines touched.
*/
u64 sample_flags;
- u64 addr;
struct perf_raw_record *raw;
u64 period;

@@ -1040,6 +1039,7 @@ struct perf_sample_data {
union perf_sample_weight weight;
union perf_mem_data_src data_src;
u64 txn;
+ u64 addr;

u64 type;
u64 ip;
@@ -1079,9 +1079,13 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
{
/* remaining struct members initialized in perf_prepare_sample() */
data->sample_flags = 0;
- data->addr = addr;
data->raw = NULL;
data->period = period;
+
+ if (addr) {
+ data->addr = addr;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
}

/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c07e9a3ea94c..a91f74db9fe9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7414,6 +7414,11 @@ void perf_prepare_sample(struct perf_event_header *header,
if (filtered_sample_type & PERF_SAMPLE_TRANSACTION)
data->txn = 0;

+ if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_DATA_PAGE_SIZE)) {
+ if (filtered_sample_type & PERF_SAMPLE_ADDR)
+ data->addr = 0;
+ }
+
if (sample_type & PERF_SAMPLE_REGS_INTR) {
/* regs dump ABI info */
int size = sizeof(u64);
--
2.37.3.968.ga6b4b080e4-goog


2022-09-22 15:00:33

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 1/2] perf: Use sample_flags for addr

On Wed, Sep 21, 2022 at 03:00:31PM -0700, Namhyung Kim wrote:
> Use the new sample_flags to indicate whether the addr field is filled by
> the PMU driver. As most PMU drivers pass 0, it can set the flag only if
> it has a non-zero value. And use 0 in perf_sample_output() if it's not
> filled already.

So no objection to the general idea; just a question

> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 368bdc4f563f..f4a13579b0e8 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1028,7 +1028,6 @@ struct perf_sample_data {
> * minimize the cachelines touched.
> */
> u64 sample_flags;
> - u64 addr;
> struct perf_raw_record *raw;
> u64 period;
>
> @@ -1040,6 +1039,7 @@ struct perf_sample_data {
> union perf_sample_weight weight;
> union perf_mem_data_src data_src;
> u64 txn;
> + u64 addr;
>
> u64 type;
> u64 ip;

Is there a reason you placed the variable where you did?

I'm thinking we should look at what perf-tool thinks is the common set
of SAMPLE flags and make sure those fields are grouped in as little
cachelines as possible.

Things like @ip and @type, which are basically *always* set, should
definitely be on top, no?


2022-09-22 16:39:11

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH 1/2] perf: Use sample_flags for addr

Hi Peter,

On Thu, Sep 22, 2022 at 7:48 AM Peter Zijlstra <[email protected]> wrote:
>
> On Wed, Sep 21, 2022 at 03:00:31PM -0700, Namhyung Kim wrote:
> > Use the new sample_flags to indicate whether the addr field is filled by
> > the PMU driver. As most PMU drivers pass 0, it can set the flag only if
> > it has a non-zero value. And use 0 in perf_sample_output() if it's not
> > filled already.
>
> So no objection to the general idea; just a question
>
> > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> > index 368bdc4f563f..f4a13579b0e8 100644
> > --- a/include/linux/perf_event.h
> > +++ b/include/linux/perf_event.h
> > @@ -1028,7 +1028,6 @@ struct perf_sample_data {
> > * minimize the cachelines touched.
> > */
> > u64 sample_flags;
> > - u64 addr;
> > struct perf_raw_record *raw;
> > u64 period;
> >
> > @@ -1040,6 +1039,7 @@ struct perf_sample_data {
> > union perf_sample_weight weight;
> > union perf_mem_data_src data_src;
> > u64 txn;
> > + u64 addr;
> >
> > u64 type;
> > u64 ip;
>
> Is there a reason you placed the variable where you did?

No I just followed the previous change.

>
> I'm thinking we should look at what perf-tool thinks is the common set
> of SAMPLE flags and make sure those fields are grouped in as little
> cachelines as possible.
>
> Things like @ip and @type, which are basically *always* set, should
> definitely be on top, no?

Yes, you're right. With this change we can move the optional fields
and group the common fields on top - like ip, period, pid and so on.

Will send a patch to do the move on top of this, ok?

Thanks,
Namhyung

2022-09-22 21:54:54

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH] perf: Change the layout of perf_sample_data

With recent change, it can set fields only if it's actually used.
Change the data layout so that it can have commonly used fields together
in a cache line boundary. The main user (the perf tools) sets the
IP, TID, TIME, PERIOD always. Also group relevant fields like addr,
phys_addr and data_page_size.

Suggested-by: Peter Zijlstra <[email protected]>
Signed-off-by: Namhyung Kim <[email protected]>
---
include/linux/perf_event.h | 35 +++++++++++++++++------------------
1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e9b151cde491..8c16dae6e6bb 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1023,25 +1023,13 @@ extern u64 perf_event_read_value(struct perf_event *event,


struct perf_sample_data {
- /*
- * Fields set by perf_sample_data_init(), group so as to
- * minimize the cachelines touched.
- */
u64 sample_flags;
u64 period;
-
+ u64 type;
/*
- * The other fields, optionally {set,used} by
- * perf_{prepare,output}_sample().
+ * Fields set commonly by perf tools, group so as to
+ * minimize the cachelines touched.
*/
- struct perf_branch_stack *br_stack;
- union perf_sample_weight weight;
- union perf_mem_data_src data_src;
- u64 txn;
- u64 addr;
- struct perf_raw_record *raw;
-
- u64 type;
u64 ip;
struct {
u32 pid;
@@ -1049,22 +1037,33 @@ struct perf_sample_data {
} tid_entry;
u64 time;
u64 id;
- u64 stream_id;
struct {
u32 cpu;
u32 reserved;
} cpu_entry;
+
+ /*
+ * The other fields, optionally {set,used} by
+ * perf_{prepare,output}_sample().
+ */
struct perf_callchain_entry *callchain;
- u64 aux_size;
+ struct perf_raw_record *raw;
+ struct perf_branch_stack *br_stack;
+ union perf_sample_weight weight;
+ union perf_mem_data_src data_src;
+ u64 txn;

struct perf_regs regs_user;
struct perf_regs regs_intr;
u64 stack_user_size;

- u64 phys_addr;
+ u64 stream_id;
u64 cgroup;
+ u64 addr;
+ u64 phys_addr;
u64 data_page_size;
u64 code_page_size;
+ u64 aux_size;
} ____cacheline_aligned;

/* default value for data source */
--
2.37.3.998.g577e59143f-goog

2022-09-23 08:19:52

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 1/2] perf: Use sample_flags for addr

On Thu, Sep 22, 2022 at 09:32:06AM -0700, Namhyung Kim wrote:

> Will send a patch to do the move on top of this, ok?

Yes, thanks!

2022-09-28 08:17:24

by tip-bot2 for Haifeng Xu

[permalink] [raw]
Subject: [tip: perf/core] perf: Use sample_flags for addr

The following commit has been merged into the perf/core branch of tip:

Commit-ID: 7b084630153152239d84990ac4540c2dd360186f
Gitweb: https://git.kernel.org/tip/7b084630153152239d84990ac4540c2dd360186f
Author: Namhyung Kim <[email protected]>
AuthorDate: Wed, 21 Sep 2022 15:00:31 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Tue, 27 Sep 2022 22:50:24 +02:00

perf: Use sample_flags for addr

Use the new sample_flags to indicate whether the addr field is filled by
the PMU driver. As most PMU drivers pass 0, it can set the flag only if
it has a non-zero value. And use 0 in perf_sample_output() if it's not
filled already.

Signed-off-by: Namhyung Kim <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/events/intel/ds.c | 8 ++++++--
include/linux/perf_event.h | 8 ++++++--
kernel/events/core.c | 5 +++++
3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 4ba6ab6..d2e9ff1 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1621,8 +1621,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,


if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
- x86_pmu.intel_cap.pebs_format >= 1)
+ x86_pmu.intel_cap.pebs_format >= 1) {
data->addr = pebs->dla;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }

if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */
@@ -1783,8 +1785,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
}

- if (sample_type & PERF_SAMPLE_ADDR_TYPE)
+ if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
data->addr = meminfo->address;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }

if (sample_type & PERF_SAMPLE_TRANSACTION) {
data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 368bdc4..f4a1357 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1028,7 +1028,6 @@ struct perf_sample_data {
* minimize the cachelines touched.
*/
u64 sample_flags;
- u64 addr;
struct perf_raw_record *raw;
u64 period;

@@ -1040,6 +1039,7 @@ struct perf_sample_data {
union perf_sample_weight weight;
union perf_mem_data_src data_src;
u64 txn;
+ u64 addr;

u64 type;
u64 ip;
@@ -1079,9 +1079,13 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
{
/* remaining struct members initialized in perf_prepare_sample() */
data->sample_flags = 0;
- data->addr = addr;
data->raw = NULL;
data->period = period;
+
+ if (addr) {
+ data->addr = addr;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
}

/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c07e9a3..a91f74d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7414,6 +7414,11 @@ void perf_prepare_sample(struct perf_event_header *header,
if (filtered_sample_type & PERF_SAMPLE_TRANSACTION)
data->txn = 0;

+ if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_DATA_PAGE_SIZE)) {
+ if (filtered_sample_type & PERF_SAMPLE_ADDR)
+ data->addr = 0;
+ }
+
if (sample_type & PERF_SAMPLE_REGS_INTR) {
/* regs dump ABI info */
int size = sizeof(u64);