2021-09-07 16:43:57

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH V3 1/3] perf/x86: Add new event for AUX output counter index

PEBS-via-PT records contain a mask of applicable counters. To identify
which event belongs to which counter, a side-band event is needed. Until
now, there has been no side-band event, and consequently users were limited
to using a single event.

Add such a side-band event. Note the event is optimised to output only
when the counter index changes for an event. That works only so long as
all PEBS-via-PT events are scheduled together, which they are for a
recording session because they are in a single group.

Also no attribute bit is used to select the new event, so a new
kernel is not compatible with older perf tools. The assumption
being that PEBS-via-PT is sufficiently esoteric that users will not
be troubled by this.

Signed-off-by: Adrian Hunter <[email protected]>
---

Changes in V3:
Do not set assign callback unless x86_pmu.intel_cap.pebs_output_pt_available

Changes in V2:
Use callback from x86_assign_hw_event


arch/x86/events/core.c | 6 ++++++
arch/x86/events/intel/core.c | 16 ++++++++++++++++
arch/x86/events/perf_event.h | 1 +
include/linux/perf_event.h | 1 +
include/uapi/linux/perf_event.h | 15 +++++++++++++++
kernel/events/core.c | 30 ++++++++++++++++++++++++++++++
6 files changed, 69 insertions(+)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 2a57dbed4894..be33423e9762 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -66,6 +66,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable);
DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable);

+DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign);
+
DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
@@ -1215,6 +1217,8 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->last_cpu = smp_processor_id();
hwc->last_tag = ++cpuc->tags[i];

+ static_call_cond(x86_pmu_assign)(event, idx);
+
switch (hwc->idx) {
case INTEL_PMC_IDX_FIXED_BTS:
case INTEL_PMC_IDX_FIXED_VLBR:
@@ -2005,6 +2009,8 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_enable, x86_pmu.enable);
static_call_update(x86_pmu_disable, x86_pmu.disable);

+ static_call_update(x86_pmu_assign, x86_pmu.assign);
+
static_call_update(x86_pmu_add, x86_pmu.add);
static_call_update(x86_pmu_del, x86_pmu.del);
static_call_update(x86_pmu_read, x86_pmu.read);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7011e87be6d0..a555e7c2dce9 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2402,6 +2402,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
intel_pmu_pebs_disable(event);
}

+static void intel_pmu_assign_event(struct perf_event *event, int idx)
+{
+ if (is_pebs_pt(event))
+ perf_report_aux_output_id(event, idx);
+}
+
static void intel_pmu_del_event(struct perf_event *event)
{
if (needs_branch_stack(event))
@@ -4494,8 +4500,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
}

+static void intel_aux_output_init(void)
+{
+ /* Refer also intel_pmu_aux_output_match() */
+ if (x86_pmu.intel_cap.pebs_output_pt_available)
+ x86_pmu.assign = intel_pmu_assign_event;
+}
+
static int intel_pmu_aux_output_match(struct perf_event *event)
{
+ /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
if (!x86_pmu.intel_cap.pebs_output_pt_available)
return 0;

@@ -6301,6 +6315,8 @@ __init int intel_pmu_init(void)
if (is_hybrid())
intel_pmu_check_hybrid_pmus((u64)fixed_mask);

+ intel_aux_output_init();
+
return 0;
}

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index e3ac05c97b5e..76436a55d9ba 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -726,6 +726,7 @@ struct x86_pmu {
void (*enable_all)(int added);
void (*enable)(struct perf_event *);
void (*disable)(struct perf_event *);
+ void (*assign)(struct perf_event *event, int idx);
void (*add)(struct perf_event *);
void (*del)(struct perf_event *);
void (*read)(struct perf_event *event);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fe156a8170aa..6f4a15660651 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1398,6 +1398,7 @@ perf_event_addr_filters(struct perf_event *event)
}

extern void perf_event_addr_filters_sync(struct perf_event *event);
+extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);

extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_sample_data *data,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index f92880a15645..c89535de1ec8 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1141,6 +1141,21 @@ enum perf_event_type {
*/
PERF_RECORD_TEXT_POKE = 20,

+ /*
+ * Data written to the AUX area by hardware due to aux_output, may need
+ * to be matched to the event by an architecture-specific hardware ID.
+ * This records the hardware ID, but requires sample_id to provide the
+ * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT
+ * records from multiple events.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 hw_id;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
+
PERF_RECORD_MAX, /* non-ABI */
};

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 744e8726c5b2..79241b6e57c7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9073,6 +9073,36 @@ static void perf_log_itrace_start(struct perf_event *event)
perf_output_end(&handle);
}

+void perf_report_aux_output_id(struct perf_event *event, u64 hw_id)
+{
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ struct perf_aux_event {
+ struct perf_event_header header;
+ u64 hw_id;
+ } rec;
+ int ret;
+
+ if (event->parent)
+ event = event->parent;
+
+ rec.header.type = PERF_RECORD_AUX_OUTPUT_HW_ID;
+ rec.header.misc = 0;
+ rec.header.size = sizeof(rec);
+ rec.hw_id = hw_id;
+
+ perf_event_header__init_id(&rec.header, &sample, event);
+ ret = perf_output_begin(&handle, &sample, event, rec.header.size);
+
+ if (ret)
+ return;
+
+ perf_output_put(&handle, rec);
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+}
+
static int
__perf_event_account_interrupt(struct perf_event *event, int throttle)
{
--
2.17.1


2021-09-07 17:52:44

by Liang, Kan

[permalink] [raw]
Subject: Re: [PATCH V3 1/3] perf/x86: Add new event for AUX output counter index



On 9/7/2021 12:39 PM, Adrian Hunter wrote:
> PEBS-via-PT records contain a mask of applicable counters. To identify
> which event belongs to which counter, a side-band event is needed. Until
> now, there has been no side-band event, and consequently users were limited
> to using a single event.
>
> Add such a side-band event. Note the event is optimised to output only
> when the counter index changes for an event. That works only so long as
> all PEBS-via-PT events are scheduled together, which they are for a
> recording session because they are in a single group.
>
> Also no attribute bit is used to select the new event, so a new
> kernel is not compatible with older perf tools. The assumption
> being that PEBS-via-PT is sufficiently esoteric that users will not
> be troubled by this.
>
> Signed-off-by: Adrian Hunter <[email protected]>
> ---
>
> Changes in V3:
> Do not set assign callback unless x86_pmu.intel_cap.pebs_output_pt_available
>
> Changes in V2:
> Use callback from x86_assign_hw_event
>
>
> arch/x86/events/core.c | 6 ++++++
> arch/x86/events/intel/core.c | 16 ++++++++++++++++
> arch/x86/events/perf_event.h | 1 +
> include/linux/perf_event.h | 1 +
> include/uapi/linux/perf_event.h | 15 +++++++++++++++
> kernel/events/core.c | 30 ++++++++++++++++++++++++++++++
> 6 files changed, 69 insertions(+)
>
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index 2a57dbed4894..be33423e9762 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -66,6 +66,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
> DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable);
> DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable);
>
> +DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign);
> +
> DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
> DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
> DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
> @@ -1215,6 +1217,8 @@ static inline void x86_assign_hw_event(struct perf_event *event,
> hwc->last_cpu = smp_processor_id();
> hwc->last_tag = ++cpuc->tags[i];
>
> + static_call_cond(x86_pmu_assign)(event, idx);
> +
> switch (hwc->idx) {
> case INTEL_PMC_IDX_FIXED_BTS:
> case INTEL_PMC_IDX_FIXED_VLBR:
> @@ -2005,6 +2009,8 @@ static void x86_pmu_static_call_update(void)
> static_call_update(x86_pmu_enable, x86_pmu.enable);
> static_call_update(x86_pmu_disable, x86_pmu.disable);
>
> + static_call_update(x86_pmu_assign, x86_pmu.assign);
> +
> static_call_update(x86_pmu_add, x86_pmu.add);
> static_call_update(x86_pmu_del, x86_pmu.del);
> static_call_update(x86_pmu_read, x86_pmu.read);
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 7011e87be6d0..a555e7c2dce9 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2402,6 +2402,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
> intel_pmu_pebs_disable(event);
> }
>
> +static void intel_pmu_assign_event(struct perf_event *event, int idx)
> +{
> + if (is_pebs_pt(event))
> + perf_report_aux_output_id(event, idx);
> +}
> +
> static void intel_pmu_del_event(struct perf_event *event)
> {
> if (needs_branch_stack(event))
> @@ -4494,8 +4500,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
> return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
> }
>
> +static void intel_aux_output_init(void)
> +{
> + /* Refer also intel_pmu_aux_output_match() */
> + if (x86_pmu.intel_cap.pebs_output_pt_available)
> + x86_pmu.assign = intel_pmu_assign_event;
> +}

For a hybrid machine, x86_pmu.intel_cap.pebs_output_pt_available is
always cleared. We probably need the PMU specific
pmu->intel_cap.pebs_output_pt_available here.

> +
> static int intel_pmu_aux_output_match(struct perf_event *event)
> {
> + /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
> if (!x86_pmu.intel_cap.pebs_output_pt_available)
> return 0;
>

For a hybrid machine, this always return 0. I think we need to fix it first?

Thanks,
Kan


> @@ -6301,6 +6315,8 @@ __init int intel_pmu_init(void)
> if (is_hybrid())
> intel_pmu_check_hybrid_pmus((u64)fixed_mask);
>
> + intel_aux_output_init();
> +
> return 0;
> }
>
> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
> index e3ac05c97b5e..76436a55d9ba 100644
> --- a/arch/x86/events/perf_event.h
> +++ b/arch/x86/events/perf_event.h
> @@ -726,6 +726,7 @@ struct x86_pmu {
> void (*enable_all)(int added);
> void (*enable)(struct perf_event *);
> void (*disable)(struct perf_event *);
> + void (*assign)(struct perf_event *event, int idx);
> void (*add)(struct perf_event *);
> void (*del)(struct perf_event *);
> void (*read)(struct perf_event *event);
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index fe156a8170aa..6f4a15660651 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1398,6 +1398,7 @@ perf_event_addr_filters(struct perf_event *event)
> }
>
> extern void perf_event_addr_filters_sync(struct perf_event *event);
> +extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
>
> extern int perf_output_begin(struct perf_output_handle *handle,
> struct perf_sample_data *data,
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index f92880a15645..c89535de1ec8 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -1141,6 +1141,21 @@ enum perf_event_type {
> */
> PERF_RECORD_TEXT_POKE = 20,
>
> + /*
> + * Data written to the AUX area by hardware due to aux_output, may need
> + * to be matched to the event by an architecture-specific hardware ID.
> + * This records the hardware ID, but requires sample_id to provide the
> + * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT
> + * records from multiple events.
> + *
> + * struct {
> + * struct perf_event_header header;
> + * u64 hw_id;
> + * struct sample_id sample_id;
> + * };
> + */
> + PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
> +
> PERF_RECORD_MAX, /* non-ABI */
> };
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 744e8726c5b2..79241b6e57c7 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -9073,6 +9073,36 @@ static void perf_log_itrace_start(struct perf_event *event)
> perf_output_end(&handle);
> }
>
> +void perf_report_aux_output_id(struct perf_event *event, u64 hw_id)
> +{
> + struct perf_output_handle handle;
> + struct perf_sample_data sample;
> + struct perf_aux_event {
> + struct perf_event_header header;
> + u64 hw_id;
> + } rec;
> + int ret;
> +
> + if (event->parent)
> + event = event->parent;
> +
> + rec.header.type = PERF_RECORD_AUX_OUTPUT_HW_ID;
> + rec.header.misc = 0;
> + rec.header.size = sizeof(rec);
> + rec.hw_id = hw_id;
> +
> + perf_event_header__init_id(&rec.header, &sample, event);
> + ret = perf_output_begin(&handle, &sample, event, rec.header.size);
> +
> + if (ret)
> + return;
> +
> + perf_output_put(&handle, rec);
> + perf_event__output_id_sample(event, &handle, &sample);
> +
> + perf_output_end(&handle);
> +}
> +
> static int
> __perf_event_account_interrupt(struct perf_event *event, int throttle)
> {
>

2021-09-10 16:06:21

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH V3 1/3] perf/x86: Add new event for AUX output counter index

On Tue, Sep 07, 2021 at 01:45:22PM -0400, Liang, Kan wrote:
> On 9/7/2021 12:39 PM, Adrian Hunter wrote:

> > @@ -4494,8 +4500,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
> > return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
> > }
> > +static void intel_aux_output_init(void)
> > +{
> > + /* Refer also intel_pmu_aux_output_match() */
> > + if (x86_pmu.intel_cap.pebs_output_pt_available)
> > + x86_pmu.assign = intel_pmu_assign_event;
> > +}
>
> For a hybrid machine, x86_pmu.intel_cap.pebs_output_pt_available is always
> cleared. We probably need the PMU specific
> pmu->intel_cap.pebs_output_pt_available here.
>
> > +
> > static int intel_pmu_aux_output_match(struct perf_event *event)
> > {
> > + /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
> > if (!x86_pmu.intel_cap.pebs_output_pt_available)
> > return 0;
> >
>
> For a hybrid machine, this always return 0. I think we need to fix it first?

AFAICT the patch is correct for !hybrid, and the hybrid PT muck can then
also fix this up, right?

2021-09-10 16:30:48

by Liang, Kan

[permalink] [raw]
Subject: Re: [PATCH V3 1/3] perf/x86: Add new event for AUX output counter index



On 9/10/2021 12:04 PM, Peter Zijlstra wrote:
> On Tue, Sep 07, 2021 at 01:45:22PM -0400, Liang, Kan wrote:
>> On 9/7/2021 12:39 PM, Adrian Hunter wrote:
>
>>> @@ -4494,8 +4500,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
>>> return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
>>> }
>>> +static void intel_aux_output_init(void)
>>> +{
>>> + /* Refer also intel_pmu_aux_output_match() */
>>> + if (x86_pmu.intel_cap.pebs_output_pt_available)
>>> + x86_pmu.assign = intel_pmu_assign_event;
>>> +}
>>
>> For a hybrid machine, x86_pmu.intel_cap.pebs_output_pt_available is always
>> cleared. We probably need the PMU specific
>> pmu->intel_cap.pebs_output_pt_available here.
>>
>>> +
>>> static int intel_pmu_aux_output_match(struct perf_event *event)
>>> {
>>> + /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
>>> if (!x86_pmu.intel_cap.pebs_output_pt_available)
>>> return 0;
>>>
>>
>> For a hybrid machine, this always return 0. I think we need to fix it first?
>
> AFAICT the patch is correct for !hybrid, and the hybrid PT muck can then
> also fix this up, right?
>

Yes, for !hybrid, the patch is good.

Since PEBS via PT is temporarily disabled for hybrid for now, the patch
set should not bring any issues with hybrid either.
The hybrid PT can be fixed separately.

Thanks,
Kan

2021-09-23 18:30:37

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH V3 1/3] perf/x86: Add new event for AUX output counter index

On 10/09/21 7:29 pm, Liang, Kan wrote:
>
>
> On 9/10/2021 12:04 PM, Peter Zijlstra wrote:
>> On Tue, Sep 07, 2021 at 01:45:22PM -0400, Liang, Kan wrote:
>>> On 9/7/2021 12:39 PM, Adrian Hunter wrote:
>>
>>>> @@ -4494,8 +4500,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
>>>>        return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
>>>>    }
>>>> +static void intel_aux_output_init(void)
>>>> +{
>>>> +    /* Refer also intel_pmu_aux_output_match() */
>>>> +    if (x86_pmu.intel_cap.pebs_output_pt_available)
>>>> +        x86_pmu.assign = intel_pmu_assign_event;
>>>> +}
>>>
>>> For a hybrid machine, x86_pmu.intel_cap.pebs_output_pt_available is always
>>> cleared. We probably need the PMU specific
>>> pmu->intel_cap.pebs_output_pt_available here.
>>>
>>>> +
>>>>    static int intel_pmu_aux_output_match(struct perf_event *event)
>>>>    {
>>>> +    /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
>>>>        if (!x86_pmu.intel_cap.pebs_output_pt_available)
>>>>            return 0;
>>>>
>>>
>>> For a hybrid machine, this always return 0. I think we need to fix it first?
>>
>> AFAICT the patch is correct for !hybrid, and the hybrid PT muck can then
>> also fix this up, right?
>>
>
> Yes, for !hybrid, the patch is good.
>
> Since PEBS via PT is temporarily disabled for hybrid for now, the patch set should not bring any issues with hybrid either.
> The hybrid PT can be fixed separately.

I don't have much time to look at the hybrid case right now.

Would it be OK to go ahead with these patches?

2021-09-23 19:26:48

by Alexander Shishkin

[permalink] [raw]
Subject: Re: [PATCH V3 1/3] perf/x86: Add new event for AUX output counter index

Adrian Hunter <[email protected]> writes:

> On 10/09/21 7:29 pm, Liang, Kan wrote:
>>
>>
>> On 9/10/2021 12:04 PM, Peter Zijlstra wrote:
>>> On Tue, Sep 07, 2021 at 01:45:22PM -0400, Liang, Kan wrote:
>>>> On 9/7/2021 12:39 PM, Adrian Hunter wrote:
>>>
>>>>> @@ -4494,8 +4500,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
>>>>>        return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
>>>>>    }
>>>>> +static void intel_aux_output_init(void)
>>>>> +{
>>>>> +    /* Refer also intel_pmu_aux_output_match() */
>>>>> +    if (x86_pmu.intel_cap.pebs_output_pt_available)
>>>>> +        x86_pmu.assign = intel_pmu_assign_event;
>>>>> +}
>>>>
>>>> For a hybrid machine, x86_pmu.intel_cap.pebs_output_pt_available is always
>>>> cleared. We probably need the PMU specific
>>>> pmu->intel_cap.pebs_output_pt_available here.
>>>>
>>>>> +
>>>>>    static int intel_pmu_aux_output_match(struct perf_event *event)
>>>>>    {
>>>>> +    /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
>>>>>        if (!x86_pmu.intel_cap.pebs_output_pt_available)
>>>>>            return 0;
>>>>>
>>>>
>>>> For a hybrid machine, this always return 0. I think we need to fix it first?
>>>
>>> AFAICT the patch is correct for !hybrid, and the hybrid PT muck can then
>>> also fix this up, right?
>>>
>>
>> Yes, for !hybrid, the patch is good.
>>
>> Since PEBS via PT is temporarily disabled for hybrid for now, the patch set should not bring any issues with hybrid either.
>> The hybrid PT can be fixed separately.
>
> I don't have much time to look at the hybrid case right now.
>
> Would it be OK to go ahead with these patches?

I'll deal with the PEBS-via-PT on hybrid. As it stands right now, this
patchset is good.

Regards,
--
Alex

2021-10-13 09:14:23

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH V3 1/3] perf/x86: Add new event for AUX output counter index

On 23/09/2021 22:22, Alexander Shishkin wrote:
> Adrian Hunter <[email protected]> writes:
>
>> On 10/09/21 7:29 pm, Liang, Kan wrote:
>>>
>>>
>>> On 9/10/2021 12:04 PM, Peter Zijlstra wrote:
>>>> On Tue, Sep 07, 2021 at 01:45:22PM -0400, Liang, Kan wrote:
>>>>> On 9/7/2021 12:39 PM, Adrian Hunter wrote:
>>>>
>>>>>> @@ -4494,8 +4500,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
>>>>>>        return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
>>>>>>    }
>>>>>> +static void intel_aux_output_init(void)
>>>>>> +{
>>>>>> +    /* Refer also intel_pmu_aux_output_match() */
>>>>>> +    if (x86_pmu.intel_cap.pebs_output_pt_available)
>>>>>> +        x86_pmu.assign = intel_pmu_assign_event;
>>>>>> +}
>>>>>
>>>>> For a hybrid machine, x86_pmu.intel_cap.pebs_output_pt_available is always
>>>>> cleared. We probably need the PMU specific
>>>>> pmu->intel_cap.pebs_output_pt_available here.
>>>>>
>>>>>> +
>>>>>>    static int intel_pmu_aux_output_match(struct perf_event *event)
>>>>>>    {
>>>>>> +    /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
>>>>>>        if (!x86_pmu.intel_cap.pebs_output_pt_available)
>>>>>>            return 0;
>>>>>>
>>>>>
>>>>> For a hybrid machine, this always return 0. I think we need to fix it first?
>>>>
>>>> AFAICT the patch is correct for !hybrid, and the hybrid PT muck can then
>>>> also fix this up, right?
>>>>
>>>
>>> Yes, for !hybrid, the patch is good.
>>>
>>> Since PEBS via PT is temporarily disabled for hybrid for now, the patch set should not bring any issues with hybrid either.
>>> The hybrid PT can be fixed separately.
>>
>> I don't have much time to look at the hybrid case right now.
>>
>> Would it be OK to go ahead with these patches?
>
> I'll deal with the PEBS-via-PT on hybrid. As it stands right now, this
> patchset is good.

Will anyone takes these patches? Perhaps Arnaldo if no one objects?
The patches still seem to apply cleanly.

2021-10-13 14:20:28

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH V3 1/3] perf/x86: Add new event for AUX output counter index

On Wed, Oct 13, 2021 at 12:12:48PM +0300, Adrian Hunter wrote:

> Will anyone takes these patches? Perhaps Arnaldo if no one objects?
> The patches still seem to apply cleanly.

I've picked up the first. But if acme wants to route the whole lot:

Acked-by: Peter Zijlstra (Intel) <[email protected]>

2021-10-13 15:37:22

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH V3 1/3] perf/x86: Add new event for AUX output counter index

Em Wed, Oct 13, 2021 at 04:17:49PM +0200, Peter Zijlstra escreveu:
> On Wed, Oct 13, 2021 at 12:12:48PM +0300, Adrian Hunter wrote:
>
> > Will anyone takes these patches? Perhaps Arnaldo if no one objects?
> > The patches still seem to apply cleanly.
>
> I've picked up the first. But if acme wants to route the whole lot:
>
> Acked-by: Peter Zijlstra (Intel) <[email protected]>


I'll pick the rest, thanks.

- Arnaldo

2021-10-15 17:44:22

by tip-bot2 for Haifeng Xu

[permalink] [raw]
Subject: [tip: perf/core] perf/x86: Add new event for AUX output counter index

The following commit has been merged into the perf/core branch of tip:

Commit-ID: 8b8ff8cc3b8155c18162e8b1f70e1230db176862
Gitweb: https://git.kernel.org/tip/8b8ff8cc3b8155c18162e8b1f70e1230db176862
Author: Adrian Hunter <[email protected]>
AuthorDate: Tue, 07 Sep 2021 19:39:01 +03:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Fri, 15 Oct 2021 11:25:31 +02:00

perf/x86: Add new event for AUX output counter index

PEBS-via-PT records contain a mask of applicable counters. To identify
which event belongs to which counter, a side-band event is needed. Until
now, there has been no side-band event, and consequently users were limited
to using a single event.

Add such a side-band event. Note the event is optimised to output only
when the counter index changes for an event. That works only so long as
all PEBS-via-PT events are scheduled together, which they are for a
recording session because they are in a single group.

Also no attribute bit is used to select the new event, so a new
kernel is not compatible with older perf tools. The assumption
being that PEBS-via-PT is sufficiently esoteric that users will not
be troubled by this.

Signed-off-by: Adrian Hunter <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/events/core.c | 6 ++++++
arch/x86/events/intel/core.c | 16 ++++++++++++++++
arch/x86/events/perf_event.h | 1 +
include/linux/perf_event.h | 1 +
include/uapi/linux/perf_event.h | 15 +++++++++++++++
kernel/events/core.c | 30 ++++++++++++++++++++++++++++++
6 files changed, 69 insertions(+)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 2a57dbe..be33423 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -66,6 +66,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable);
DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable);

+DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign);
+
DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
@@ -1215,6 +1217,8 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->last_cpu = smp_processor_id();
hwc->last_tag = ++cpuc->tags[i];

+ static_call_cond(x86_pmu_assign)(event, idx);
+
switch (hwc->idx) {
case INTEL_PMC_IDX_FIXED_BTS:
case INTEL_PMC_IDX_FIXED_VLBR:
@@ -2005,6 +2009,8 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_enable, x86_pmu.enable);
static_call_update(x86_pmu_disable, x86_pmu.disable);

+ static_call_update(x86_pmu_assign, x86_pmu.assign);
+
static_call_update(x86_pmu_add, x86_pmu.add);
static_call_update(x86_pmu_del, x86_pmu.del);
static_call_update(x86_pmu_read, x86_pmu.read);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7011e87..a555e7c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2402,6 +2402,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
intel_pmu_pebs_disable(event);
}

+static void intel_pmu_assign_event(struct perf_event *event, int idx)
+{
+ if (is_pebs_pt(event))
+ perf_report_aux_output_id(event, idx);
+}
+
static void intel_pmu_del_event(struct perf_event *event)
{
if (needs_branch_stack(event))
@@ -4494,8 +4500,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
}

+static void intel_aux_output_init(void)
+{
+ /* Refer also intel_pmu_aux_output_match() */
+ if (x86_pmu.intel_cap.pebs_output_pt_available)
+ x86_pmu.assign = intel_pmu_assign_event;
+}
+
static int intel_pmu_aux_output_match(struct perf_event *event)
{
+ /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
if (!x86_pmu.intel_cap.pebs_output_pt_available)
return 0;

@@ -6301,6 +6315,8 @@ __init int intel_pmu_init(void)
if (is_hybrid())
intel_pmu_check_hybrid_pmus((u64)fixed_mask);

+ intel_aux_output_init();
+
return 0;
}

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index e3ac05c..76436a5 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -726,6 +726,7 @@ struct x86_pmu {
void (*enable_all)(int added);
void (*enable)(struct perf_event *);
void (*disable)(struct perf_event *);
+ void (*assign)(struct perf_event *event, int idx);
void (*add)(struct perf_event *);
void (*del)(struct perf_event *);
void (*read)(struct perf_event *event);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2d510ad..126b3a3 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1397,6 +1397,7 @@ perf_event_addr_filters(struct perf_event *event)
}

extern void perf_event_addr_filters_sync(struct perf_event *event);
+extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);

extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_sample_data *data,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index f92880a..c89535d 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1141,6 +1141,21 @@ enum perf_event_type {
*/
PERF_RECORD_TEXT_POKE = 20,

+ /*
+ * Data written to the AUX area by hardware due to aux_output, may need
+ * to be matched to the event by an architecture-specific hardware ID.
+ * This records the hardware ID, but requires sample_id to provide the
+ * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT
+ * records from multiple events.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 hw_id;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
+
PERF_RECORD_MAX, /* non-ABI */
};

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1cb1f9b..0e90a50 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9062,6 +9062,36 @@ static void perf_log_itrace_start(struct perf_event *event)
perf_output_end(&handle);
}

+void perf_report_aux_output_id(struct perf_event *event, u64 hw_id)
+{
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ struct perf_aux_event {
+ struct perf_event_header header;
+ u64 hw_id;
+ } rec;
+ int ret;
+
+ if (event->parent)
+ event = event->parent;
+
+ rec.header.type = PERF_RECORD_AUX_OUTPUT_HW_ID;
+ rec.header.misc = 0;
+ rec.header.size = sizeof(rec);
+ rec.hw_id = hw_id;
+
+ perf_event_header__init_id(&rec.header, &sample, event);
+ ret = perf_output_begin(&handle, &sample, event, rec.header.size);
+
+ if (ret)
+ return;
+
+ perf_output_put(&handle, rec);
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+}
+
static int
__perf_event_account_interrupt(struct perf_event *event, int throttle)
{