Use the new sample_flags to indicate whether the addr field is filled by
the PMU driver. As most PMU drivers pass 0, it can set the flag only if
it has a non-zero value. And use 0 in perf_sample_output() if it's not
filled already.
Cc: Kan Liang <[email protected]>
Signed-off-by: Namhyung Kim <[email protected]>
---
arch/x86/events/intel/ds.c | 8 ++++++--
include/linux/perf_event.h | 8 ++++++--
kernel/events/core.c | 5 +++++
3 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 4ba6ab6d0d92..d2e9ff16f6ed 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1621,8 +1621,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
- x86_pmu.intel_cap.pebs_format >= 1)
+ x86_pmu.intel_cap.pebs_format >= 1) {
data->addr = pebs->dla;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */
@@ -1783,8 +1785,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
}
- if (sample_type & PERF_SAMPLE_ADDR_TYPE)
+ if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
data->addr = meminfo->address;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
if (sample_type & PERF_SAMPLE_TRANSACTION) {
data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 368bdc4f563f..f4a13579b0e8 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1028,7 +1028,6 @@ struct perf_sample_data {
* minimize the cachelines touched.
*/
u64 sample_flags;
- u64 addr;
struct perf_raw_record *raw;
u64 period;
@@ -1040,6 +1039,7 @@ struct perf_sample_data {
union perf_sample_weight weight;
union perf_mem_data_src data_src;
u64 txn;
+ u64 addr;
u64 type;
u64 ip;
@@ -1079,9 +1079,13 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
{
/* remaining struct members initialized in perf_prepare_sample() */
data->sample_flags = 0;
- data->addr = addr;
data->raw = NULL;
data->period = period;
+
+ if (addr) {
+ data->addr = addr;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
}
/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c07e9a3ea94c..a91f74db9fe9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7414,6 +7414,11 @@ void perf_prepare_sample(struct perf_event_header *header,
if (filtered_sample_type & PERF_SAMPLE_TRANSACTION)
data->txn = 0;
+ if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_DATA_PAGE_SIZE)) {
+ if (filtered_sample_type & PERF_SAMPLE_ADDR)
+ data->addr = 0;
+ }
+
if (sample_type & PERF_SAMPLE_REGS_INTR) {
/* regs dump ABI info */
int size = sizeof(u64);
--
2.37.3.968.ga6b4b080e4-goog
On Wed, Sep 21, 2022 at 03:00:31PM -0700, Namhyung Kim wrote:
> Use the new sample_flags to indicate whether the addr field is filled by
> the PMU driver. As most PMU drivers pass 0, it can set the flag only if
> it has a non-zero value. And use 0 in perf_sample_output() if it's not
> filled already.
So no objection to the general idea; just a question
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 368bdc4f563f..f4a13579b0e8 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1028,7 +1028,6 @@ struct perf_sample_data {
> * minimize the cachelines touched.
> */
> u64 sample_flags;
> - u64 addr;
> struct perf_raw_record *raw;
> u64 period;
>
> @@ -1040,6 +1039,7 @@ struct perf_sample_data {
> union perf_sample_weight weight;
> union perf_mem_data_src data_src;
> u64 txn;
> + u64 addr;
>
> u64 type;
> u64 ip;
Is there a reason you placed the variable where you did?
I'm thinking we should look at what perf-tool thinks is the common set
of SAMPLE flags and make sure those fields are grouped in as little
cachelines as possible.
Things like @ip and @type, which are basically *always* set, should
definitely be on top, no?
Hi Peter,
On Thu, Sep 22, 2022 at 7:48 AM Peter Zijlstra <[email protected]> wrote:
>
> On Wed, Sep 21, 2022 at 03:00:31PM -0700, Namhyung Kim wrote:
> > Use the new sample_flags to indicate whether the addr field is filled by
> > the PMU driver. As most PMU drivers pass 0, it can set the flag only if
> > it has a non-zero value. And use 0 in perf_sample_output() if it's not
> > filled already.
>
> So no objection to the general idea; just a question
>
> > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> > index 368bdc4f563f..f4a13579b0e8 100644
> > --- a/include/linux/perf_event.h
> > +++ b/include/linux/perf_event.h
> > @@ -1028,7 +1028,6 @@ struct perf_sample_data {
> > * minimize the cachelines touched.
> > */
> > u64 sample_flags;
> > - u64 addr;
> > struct perf_raw_record *raw;
> > u64 period;
> >
> > @@ -1040,6 +1039,7 @@ struct perf_sample_data {
> > union perf_sample_weight weight;
> > union perf_mem_data_src data_src;
> > u64 txn;
> > + u64 addr;
> >
> > u64 type;
> > u64 ip;
>
> Is there a reason you placed the variable where you did?
No I just followed the previous change.
>
> I'm thinking we should look at what perf-tool thinks is the common set
> of SAMPLE flags and make sure those fields are grouped in as little
> cachelines as possible.
>
> Things like @ip and @type, which are basically *always* set, should
> definitely be on top, no?
Yes, you're right. With this change we can move the optional fields
and group the common fields on top - like ip, period, pid and so on.
Will send a patch to do the move on top of this, ok?
Thanks,
Namhyung
With recent change, it can set fields only if it's actually used.
Change the data layout so that it can have commonly used fields together
in a cache line boundary. The main user (the perf tools) sets the
IP, TID, TIME, PERIOD always. Also group relevant fields like addr,
phys_addr and data_page_size.
Suggested-by: Peter Zijlstra <[email protected]>
Signed-off-by: Namhyung Kim <[email protected]>
---
include/linux/perf_event.h | 35 +++++++++++++++++------------------
1 file changed, 17 insertions(+), 18 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e9b151cde491..8c16dae6e6bb 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1023,25 +1023,13 @@ extern u64 perf_event_read_value(struct perf_event *event,
struct perf_sample_data {
- /*
- * Fields set by perf_sample_data_init(), group so as to
- * minimize the cachelines touched.
- */
u64 sample_flags;
u64 period;
-
+ u64 type;
/*
- * The other fields, optionally {set,used} by
- * perf_{prepare,output}_sample().
+ * Fields set commonly by perf tools, group so as to
+ * minimize the cachelines touched.
*/
- struct perf_branch_stack *br_stack;
- union perf_sample_weight weight;
- union perf_mem_data_src data_src;
- u64 txn;
- u64 addr;
- struct perf_raw_record *raw;
-
- u64 type;
u64 ip;
struct {
u32 pid;
@@ -1049,22 +1037,33 @@ struct perf_sample_data {
} tid_entry;
u64 time;
u64 id;
- u64 stream_id;
struct {
u32 cpu;
u32 reserved;
} cpu_entry;
+
+ /*
+ * The other fields, optionally {set,used} by
+ * perf_{prepare,output}_sample().
+ */
struct perf_callchain_entry *callchain;
- u64 aux_size;
+ struct perf_raw_record *raw;
+ struct perf_branch_stack *br_stack;
+ union perf_sample_weight weight;
+ union perf_mem_data_src data_src;
+ u64 txn;
struct perf_regs regs_user;
struct perf_regs regs_intr;
u64 stack_user_size;
- u64 phys_addr;
+ u64 stream_id;
u64 cgroup;
+ u64 addr;
+ u64 phys_addr;
u64 data_page_size;
u64 code_page_size;
+ u64 aux_size;
} ____cacheline_aligned;
/* default value for data source */
--
2.37.3.998.g577e59143f-goog
On Thu, Sep 22, 2022 at 09:32:06AM -0700, Namhyung Kim wrote:
> Will send a patch to do the move on top of this, ok?
Yes, thanks!
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 7b084630153152239d84990ac4540c2dd360186f
Gitweb: https://git.kernel.org/tip/7b084630153152239d84990ac4540c2dd360186f
Author: Namhyung Kim <[email protected]>
AuthorDate: Wed, 21 Sep 2022 15:00:31 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Tue, 27 Sep 2022 22:50:24 +02:00
perf: Use sample_flags for addr
Use the new sample_flags to indicate whether the addr field is filled by
the PMU driver. As most PMU drivers pass 0, it can set the flag only if
it has a non-zero value. And use 0 in perf_sample_output() if it's not
filled already.
Signed-off-by: Namhyung Kim <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/events/intel/ds.c | 8 ++++++--
include/linux/perf_event.h | 8 ++++++--
kernel/events/core.c | 5 +++++
3 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 4ba6ab6..d2e9ff1 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1621,8 +1621,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
- x86_pmu.intel_cap.pebs_format >= 1)
+ x86_pmu.intel_cap.pebs_format >= 1) {
data->addr = pebs->dla;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */
@@ -1783,8 +1785,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
}
- if (sample_type & PERF_SAMPLE_ADDR_TYPE)
+ if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
data->addr = meminfo->address;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
if (sample_type & PERF_SAMPLE_TRANSACTION) {
data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 368bdc4..f4a1357 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1028,7 +1028,6 @@ struct perf_sample_data {
* minimize the cachelines touched.
*/
u64 sample_flags;
- u64 addr;
struct perf_raw_record *raw;
u64 period;
@@ -1040,6 +1039,7 @@ struct perf_sample_data {
union perf_sample_weight weight;
union perf_mem_data_src data_src;
u64 txn;
+ u64 addr;
u64 type;
u64 ip;
@@ -1079,9 +1079,13 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
{
/* remaining struct members initialized in perf_prepare_sample() */
data->sample_flags = 0;
- data->addr = addr;
data->raw = NULL;
data->period = period;
+
+ if (addr) {
+ data->addr = addr;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
}
/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c07e9a3..a91f74d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7414,6 +7414,11 @@ void perf_prepare_sample(struct perf_event_header *header,
if (filtered_sample_type & PERF_SAMPLE_TRANSACTION)
data->txn = 0;
+ if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_DATA_PAGE_SIZE)) {
+ if (filtered_sample_type & PERF_SAMPLE_ADDR)
+ data->addr = 0;
+ }
+
if (sample_type & PERF_SAMPLE_REGS_INTR) {
/* regs dump ABI info */
int size = sizeof(u64);