2023-06-20 16:13:38

by Yang Jihong

[permalink] [raw]
Subject: [PATCH 0/2] perf/core: deliver PERF_RECORD_COMM and PERF_RECORD_MMAP side-band events to all online cpus

Tasks are migrated between cores due to scheduling.
If perf samples specified CPUs, PERF_RECORD_COMM and PERF_RECORD_MMAP
events need be deliver to all cores to avoids the problem that the comm
and symbols cannot be parsed due to task migration to the target core.

The specific scenarios are as follows:

CPU0 CPU1
perf record -C 0 start
taskA starts to be created and executed
-> PERF_RECORD_COMM and PERF_RECORD_MMAP
events only deliver to CPU1
......
|
migrate to CPU0
|
Running on CPU0 <----------/
...

perf record -C 0 stop

Now perf samples the PC of taskA. However, perf does not record the
PERF_RECORD_COMM and PERF_RECORD_COMM events of taskA.
Therefore, the comm and symbols of taskA cannot be parsed.

Yang Jihong (2):
perf/core: perf_iterate_sb_cpu() supports to receive side-band events
for all oneline cpus
perf/core: deliver PERF_RECORD_COMM and PERF_RECORD_MMAP side-band
events to all online cpus

kernel/events/core.c | 58 +++++++++++++++++++++++++++++++-------------
1 file changed, 41 insertions(+), 17 deletions(-)

--
2.30.GIT



2023-06-20 16:17:15

by Yang Jihong

[permalink] [raw]
Subject: [PATCH 1/2] perf/core: perf_iterate_sb_cpu() supports to receive side-band events for all oneline cpus

Add a bool parameter `system_wide` to perf_iterate_sb_cpu() to output
side-band events to events on all online cpus.
No functional change.

Signed-off-by: Yang Jihong <[email protected]>
---
kernel/events/core.c | 58 +++++++++++++++++++++++++++++++-------------
1 file changed, 41 insertions(+), 17 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index db016e418931..66dbca1ba577 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7886,9 +7886,9 @@ perf_iterate_ctx(struct perf_event_context *ctx,
}
}

-static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
+static void perf_iterate_sb_pel(perf_iterate_f output, void *data,
+ struct pmu_event_list *pel, bool system_wide)
{
- struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
struct perf_event *event;

list_for_each_entry_rcu(event, &pel->list, sb_list) {
@@ -7902,12 +7902,30 @@ static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)

if (event->state < PERF_EVENT_STATE_INACTIVE)
continue;
- if (!event_filter_match(event))
+ if (!system_wide && !event_filter_match(event))
continue;
output(event, data);
}
}

+static void perf_iterate_sb_cpu(perf_iterate_f output, void *data,
+ bool system_wide)
+{
+ unsigned int i;
+
+ if (system_wide) {
+ for_each_online_cpu(i) {
+ perf_iterate_sb_pel(output, data,
+ per_cpu_ptr(&pmu_sb_events, i),
+ system_wide);
+ }
+ } else {
+ perf_iterate_sb_pel(output, data,
+ this_cpu_ptr(&pmu_sb_events),
+ system_wide);
+ }
+}
+
/*
* Iterate all events that need to receive side-band events.
*
@@ -7916,7 +7934,8 @@ static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
*/
static void
perf_iterate_sb(perf_iterate_f output, void *data,
- struct perf_event_context *task_ctx)
+ struct perf_event_context *task_ctx,
+ bool system_wide)
{
struct perf_event_context *ctx;

@@ -7933,7 +7952,7 @@ perf_iterate_sb(perf_iterate_f output, void *data,
goto done;
}

- perf_iterate_sb_cpu(output, data);
+ perf_iterate_sb_cpu(output, data, system_wide);

ctx = rcu_dereference(current->perf_event_ctxp);
if (ctx)
@@ -8174,8 +8193,9 @@ static void perf_event_task(struct task_struct *task,
};

perf_iterate_sb(perf_event_task_output,
- &task_event,
- task_ctx);
+ &task_event,
+ task_ctx,
+ false);
}

void perf_event_fork(struct task_struct *task)
@@ -8254,8 +8274,9 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;

perf_iterate_sb(perf_event_comm_output,
- comm_event,
- NULL);
+ comm_event,
+ NULL,
+ false);
}

void perf_event_comm(struct task_struct *task, bool exec)
@@ -8410,7 +8431,8 @@ void perf_event_namespaces(struct task_struct *task)

perf_iterate_sb(perf_event_namespaces_output,
&namespaces_event,
- NULL);
+ NULL,
+ false);
}

/*
@@ -8505,7 +8527,8 @@ static void perf_event_cgroup(struct cgroup *cgrp)

perf_iterate_sb(perf_event_cgroup_output,
&cgroup_event,
- NULL);
+ NULL,
+ false);

kfree(pathname);
}
@@ -8730,8 +8753,9 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
build_id_parse(vma, mmap_event->build_id, &mmap_event->build_id_size);

perf_iterate_sb(perf_event_mmap_output,
- mmap_event,
- NULL);
+ mmap_event,
+ NULL,
+ false);

kfree(buf);
}
@@ -9020,7 +9044,7 @@ static void perf_event_switch(struct task_struct *task,
PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
}

- perf_iterate_sb(perf_event_switch_output, &switch_event, NULL);
+ perf_iterate_sb(perf_event_switch_output, &switch_event, NULL, false);
}

/*
@@ -9149,7 +9173,7 @@ void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
},
};

- perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
+ perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL, false);
return;
err:
WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
@@ -9261,7 +9285,7 @@ void perf_event_bpf_event(struct bpf_prog *prog,
BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));

memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
- perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
+ perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL, false);
}

struct perf_text_poke_event {
@@ -9345,7 +9369,7 @@ void perf_event_text_poke(const void *addr, const void *old_bytes,
},
};

- perf_iterate_sb(perf_event_text_poke_output, &text_poke_event, NULL);
+ perf_iterate_sb(perf_event_text_poke_output, &text_poke_event, NULL, false);
}

void perf_event_itrace_started(struct perf_event *event)
--
2.30.GIT


2023-06-20 16:20:17

by Yang Jihong

[permalink] [raw]
Subject: [PATCH 2/2] perf/core: deliver PERF_RECORD_COMM and PERF_RECORD_MMAP side-band events to all online cpus

Tasks are migrated between cores due to scheduling.
If perf samples specified CPUs, PERF_RECORD_COMM and PERF_RECORD_MMAP
events need be deliver to all cores to avoids the problem that the comm
and symbols cannot be parsed due to task migration to the target core.

The specific scenarios are as follows:

CPU0 CPU1
perf record -C 0 start
taskA starts to be created and executed
-> PERF_RECORD_COMM and PERF_RECORD_MMAP
events only deliver to CPU1
......
|
migrate to CPU0
|
Running on CPU0 <----------/
...

perf record -C 0 stop

Now perf samples the PC of taskA. However, perf does not record the
PERF_RECORD_COMM and PERF_RECORD_COMM events of taskA.
Therefore, the comm and symbols of taskA cannot be parsed.

Signed-off-by: Yang Jihong <[email protected]>
---
kernel/events/core.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 66dbca1ba577..a014b2063af1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8276,7 +8276,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
perf_iterate_sb(perf_event_comm_output,
comm_event,
NULL,
- false);
+ true);
}

void perf_event_comm(struct task_struct *task, bool exec)
@@ -8755,7 +8755,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
perf_iterate_sb(perf_event_mmap_output,
mmap_event,
NULL,
- false);
+ true);

kfree(buf);
}
--
2.30.GIT


2023-06-20 18:56:51

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH 0/2] perf/core: deliver PERF_RECORD_COMM and PERF_RECORD_MMAP side-band events to all online cpus

On 20/06/23 18:55, Yang Jihong wrote:
> Tasks are migrated between cores due to scheduling.
> If perf samples specified CPUs, PERF_RECORD_COMM and PERF_RECORD_MMAP
> events need be deliver to all cores to avoids the problem that the comm
> and symbols cannot be parsed due to task migration to the target core.
>
> The specific scenarios are as follows:
>
> CPU0 CPU1
> perf record -C 0 start
> taskA starts to be created and executed
> -> PERF_RECORD_COMM and PERF_RECORD_MMAP
> events only deliver to CPU1
> ......
> |
> migrate to CPU0
> |
> Running on CPU0 <----------/
> ...
>
> perf record -C 0 stop
>
> Now perf samples the PC of taskA. However, perf does not record the
> PERF_RECORD_COMM and PERF_RECORD_COMM events of taskA.
> Therefore, the comm and symbols of taskA cannot be parsed.

perf record deals with this for PERF_RECORD_TEXT_POKE, by opening
the corresponding "dummy" event on each CPU irrespective of whether
the main events are on selected CPUs. Refer
evlist__add_dummy_on_all_cpus().

So this could be handled by perf record.

>
> Yang Jihong (2):
> perf/core: perf_iterate_sb_cpu() supports to receive side-band events
> for all oneline cpus
> perf/core: deliver PERF_RECORD_COMM and PERF_RECORD_MMAP side-band
> events to all online cpus
>
> kernel/events/core.c | 58 +++++++++++++++++++++++++++++++-------------
> 1 file changed, 41 insertions(+), 17 deletions(-)
>


2023-06-23 06:53:45

by Yang Jihong

[permalink] [raw]
Subject: Re: [PATCH 0/2] perf/core: deliver PERF_RECORD_COMM and PERF_RECORD_MMAP side-band events to all online cpus

Hello,

On 2023/6/21 2:49, Adrian Hunter wrote:
> On 20/06/23 18:55, Yang Jihong wrote:
>> Tasks are migrated between cores due to scheduling.
>> If perf samples specified CPUs, PERF_RECORD_COMM and PERF_RECORD_MMAP
>> events need be deliver to all cores to avoids the problem that the comm
>> and symbols cannot be parsed due to task migration to the target core.
>>
>> The specific scenarios are as follows:
>>
>> CPU0 CPU1
>> perf record -C 0 start
>> taskA starts to be created and executed
>> -> PERF_RECORD_COMM and PERF_RECORD_MMAP
>> events only deliver to CPU1
>> ......
>> |
>> migrate to CPU0
>> |
>> Running on CPU0 <----------/
>> ...
>>
>> perf record -C 0 stop
>>
>> Now perf samples the PC of taskA. However, perf does not record the
>> PERF_RECORD_COMM and PERF_RECORD_COMM events of taskA.
>> Therefore, the comm and symbols of taskA cannot be parsed.
>
> perf record deals with this for PERF_RECORD_TEXT_POKE, by opening
> the corresponding "dummy" event on each CPU irrespective of whether
> the main events are on selected CPUs. Refer
> evlist__add_dummy_on_all_cpus().
>
> So this could be handled by perf record.

Okay, I'll take a look at it and use this solution to deal with it.

Thanks,
Yang