2023-01-12 21:52:10

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 7/8] perf/core: Introduce perf_prepare_header()

Factor out perf_prepare_header() so that it can call
perf_prepare_sample() without a header if not needed.

Also it checks the filtered_sample_type to avoid duplicate
work when perf_prepare_sample() is called twice (or more).

Cc: [email protected]
Cc: [email protected]
Suggested-by: Peter Zijlstr <[email protected]>
Signed-off-by: Namhyung Kim <[email protected]>
---
arch/s390/kernel/perf_cpum_sf.c | 3 ++-
arch/x86/events/intel/ds.c | 3 ++-
include/linux/perf_event.h | 16 +++++++++++++-
kernel/events/core.c | 38 +++++++++++++++++++++------------
4 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 332a49965130..fd02f8423243 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -671,7 +671,8 @@ static void cpumsf_output_event_pid(struct perf_event *event,
/* Protect callchain buffers, tasks */
rcu_read_lock();

- perf_prepare_sample(&header, data, event, regs);
+ perf_prepare_sample(data, event, regs);
+ perf_prepare_header(&header, data, event, regs);
if (perf_output_begin(&handle, data, event, header.size))
goto out;

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 07c8a2cdc3ee..183efa914b99 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -807,7 +807,8 @@ int intel_pmu_drain_bts_buffer(void)
* the sample.
*/
rcu_read_lock();
- perf_prepare_sample(&header, &data, event, &regs);
+ perf_prepare_sample(&data, event, &regs);
+ perf_prepare_header(&header, &data, event, &regs);

if (perf_output_begin(&handle, &data, event,
header.size * (top - base - skip)))
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7db0e9cc2682..d5628a7b5eaa 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1250,6 +1250,17 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}

+static inline u32 perf_sample_data_size(struct perf_sample_data *data,
+ struct perf_event *event)
+{
+ u32 size = sizeof(struct perf_event_header);
+
+ size += event->header_size + event->id_header_size;
+ size += data->dyn_size;
+
+ return size;
+}
+
/*
* Clear all bitfields in the perf_branch_entry.
* The to and from fields are not cleared because they are
@@ -1271,7 +1282,10 @@ extern void perf_output_sample(struct perf_output_handle *handle,
struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event);
-extern void perf_prepare_sample(struct perf_event_header *header,
+extern void perf_prepare_sample(struct perf_sample_data *data,
+ struct perf_event *event,
+ struct pt_regs *regs);
+extern void perf_prepare_header(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event,
struct pt_regs *regs);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 127dddd20f93..5c4f3fa3d2b7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7574,20 +7574,13 @@ static __always_inline u64 __cond_set(u64 flags, u64 s, u64 d)
return d * !!(flags & s);
}

-void perf_prepare_sample(struct perf_event_header *header,
- struct perf_sample_data *data,
+void perf_prepare_sample(struct perf_sample_data *data,
struct perf_event *event,
struct pt_regs *regs)
{
u64 sample_type = event->attr.sample_type;
u64 filtered_sample_type;

- header->type = PERF_RECORD_SAMPLE;
- header->size = sizeof(*header) + event->header_size + event->id_header_size;
-
- header->misc = 0;
- header->misc |= perf_misc_flags(regs);
-
/*
* Add the sample flags that are dependent to others. And clear the
* sample flags that have already been done by the PMU driver.
@@ -7601,6 +7594,12 @@ void perf_prepare_sample(struct perf_event_header *header,
PERF_SAMPLE_REGS_USER);
filtered_sample_type &= ~data->sample_flags;

+ if (filtered_sample_type == 0) {
+ /* Make sure it has the correct data->type for output */
+ data->type = event->attr.sample_type;
+ return;
+ }
+
__perf_event_header__init_id(data, event, filtered_sample_type);
data->sample_flags |= sample_type & PERF_SAMPLE_ID_ALL;

@@ -7653,9 +7652,10 @@ void perf_prepare_sample(struct perf_event_header *header,
* up the rest of the sample size.
*/
u16 stack_size = event->attr.sample_stack_user;
+ u16 header_size = perf_sample_data_size(data, event);
u16 size = sizeof(u64);

- stack_size = perf_sample_ustack_size(stack_size, header->size,
+ stack_size = perf_sample_ustack_size(stack_size, header_size,
data->regs_user.regs);

/*
@@ -7740,8 +7740,9 @@ void perf_prepare_sample(struct perf_event_header *header,

if (filtered_sample_type & PERF_SAMPLE_AUX) {
u64 size;
+ u16 header_size = perf_sample_data_size(data, event);

- header->size += sizeof(u64); /* size */
+ header_size += sizeof(u64); /* size */

/*
* Given the 16bit nature of header::size, an AUX sample can
@@ -7749,17 +7750,25 @@ void perf_prepare_sample(struct perf_event_header *header,
* Make sure this doesn't happen by using up to U16_MAX bytes
* per sample in total (rounded down to 8 byte boundary).
*/
- size = min_t(size_t, U16_MAX - header->size,
+ size = min_t(size_t, U16_MAX - header_size,
event->attr.aux_sample_size);
size = rounddown(size, 8);
size = perf_prepare_sample_aux(event, data, size);

- WARN_ON_ONCE(size + header->size > U16_MAX);
+ WARN_ON_ONCE(size + header_size > U16_MAX);
data->dyn_size += size + sizeof(u64); /* size above */
data->sample_flags |= PERF_SAMPLE_AUX;
}
+}

- header->size += data->dyn_size;
+void perf_prepare_header(struct perf_event_header *header,
+ struct perf_sample_data *data,
+ struct perf_event *event,
+ struct pt_regs *regs)
+{
+ header->type = PERF_RECORD_SAMPLE;
+ header->size = perf_sample_data_size(data, event);
+ header->misc = perf_misc_flags(regs);

/*
* If you're adding more sample types here, you likely need to do
@@ -7788,7 +7797,8 @@ __perf_event_output(struct perf_event *event,
/* protect the callchain buffers */
rcu_read_lock();

- perf_prepare_sample(&header, data, event, regs);
+ perf_prepare_sample(data, event, regs);
+ perf_prepare_header(&header, data, event, regs);

err = output_begin(&handle, data, event, header.size);
if (err)
--
2.39.0.314.g84b9a713c41-goog


2023-01-13 22:04:35

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH 7/8] perf/core: Introduce perf_prepare_header()

On Thu, Jan 12, 2023 at 1:40 PM Namhyung Kim <[email protected]> wrote:
>
> Factor out perf_prepare_header() so that it can call
> perf_prepare_sample() without a header if not needed.
>
> Also it checks the filtered_sample_type to avoid duplicate
> work when perf_prepare_sample() is called twice (or more).
>
> Cc: [email protected]
> Cc: [email protected]
> Suggested-by: Peter Zijlstr <[email protected]>
> Signed-off-by: Namhyung Kim <[email protected]>

Acked-by: Song Liu <[email protected]>