2023-05-22 11:57:45

by Liang, Kan

[permalink] [raw]
Subject: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

From: Kan Liang <[email protected]>

The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
both have Crestmont core. From the core PMU's perspective, they are
similar to the e-core of MTL. The only difference is the LBR event
logging feature, which will be implemented in the following patches.

Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.

Reviewed-by: Andi Kleen <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
---

No changes since V1.

arch/x86/events/intel/core.c | 52 +++++++++++++++++++++++++++++++++++-
arch/x86/events/intel/ds.c | 9 +++++--
arch/x86/events/perf_event.h | 2 ++
3 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a3fb996a86a1..ba2a971e6b8a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2119,6 +2119,17 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};

+EVENT_ATTR_STR(topdown-retiring, td_retiring_cmt, "event=0x72,umask=0x0");
+EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_cmt, "event=0x73,umask=0x0");
+
+static struct attribute *cmt_events_attrs[] = {
+ EVENT_PTR(td_fe_bound_tnt),
+ EVENT_PTR(td_retiring_cmt),
+ EVENT_PTR(td_bad_spec_cmt),
+ EVENT_PTR(td_be_bound_tnt),
+ NULL
+};
+
static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
@@ -4830,6 +4841,8 @@ PMU_FORMAT_ATTR(ldlat, "config1:0-15");

PMU_FORMAT_ATTR(frontend, "config1:0-23");

+PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63");
+
static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@@ -4860,6 +4873,13 @@ static struct attribute *slm_format_attr[] = {
NULL
};

+static struct attribute *cmt_format_attr[] = {
+ &format_attr_offcore_rsp.attr,
+ &format_attr_ldlat.attr,
+ &format_attr_snoop_rsp.attr,
+ NULL
+};
+
static struct attribute *skl_format_attr[] = {
&format_attr_frontend.attr,
NULL,
@@ -5630,7 +5650,6 @@ static struct attribute *adl_hybrid_extra_attr[] = {
NULL
};

-PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);

static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
@@ -6178,6 +6197,37 @@ __init int intel_pmu_init(void)
name = "gracemont";
break;

+ case INTEL_FAM6_GRANDRIDGE:
+ case INTEL_FAM6_SIERRAFOREST_X:
+ x86_pmu.mid_ack = true;
+ memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+ hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_cmt_extra_regs;
+
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.lbr_pt_coexist = true;
+ x86_pmu.pebs_block = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+
+ intel_pmu_pebs_data_source_cmt();
+ x86_pmu.pebs_latency_data = mtl_latency_data_small;
+ x86_pmu.get_event_constraints = cmt_get_event_constraints;
+ x86_pmu.limit_period = spr_limit_period;
+ td_attr = cmt_events_attrs;
+ mem_attr = grt_mem_attrs;
+ extra_attr = cmt_format_attr;
+ pr_cont("Crestmont events, ");
+ name = "crestmont";
+ break;
+
case INTEL_FAM6_WESTMERE:
case INTEL_FAM6_WESTMERE_EP:
case INTEL_FAM6_WESTMERE_EX:
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index a2e566e53076..608e220e46aa 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -144,7 +144,7 @@ void __init intel_pmu_pebs_data_source_adl(void)
__intel_pmu_pebs_data_source_grt(data_source);
}

-static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source)
+static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
{
data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
@@ -164,7 +164,12 @@ void __init intel_pmu_pebs_data_source_mtl(void)

data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
- intel_pmu_pebs_data_source_cmt(data_source);
+ __intel_pmu_pebs_data_source_cmt(data_source);
+}
+
+void __init intel_pmu_pebs_data_source_cmt(void)
+{
+ __intel_pmu_pebs_data_source_cmt(pebs_data_source);
}

static u64 precise_store_data(u64 status)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index d6de4487348c..c8ba2be7585d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1606,6 +1606,8 @@ void intel_pmu_pebs_data_source_grt(void);

void intel_pmu_pebs_data_source_mtl(void);

+void intel_pmu_pebs_data_source_cmt(void);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);

void intel_pt_interrupt(void);
--
2.35.1



2023-05-22 20:51:38

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

On Mon, May 22, 2023 at 04:30:35AM -0700, [email protected] wrote:
> From: Kan Liang <[email protected]>
>
> The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
> both have Crestmont core. From the core PMU's perspective, they are
> similar to the e-core of MTL. The only difference is the LBR event
> logging feature, which will be implemented in the following patches.
>
> Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.

Moo... Tony, did you sneak product names instead of uarch names in the
intel-family thing again?

That is; I'm thinking we want the below, no?

---

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index b3af2d45bbbb..0e804d189e63 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -116,16 +116,16 @@
#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
#define INTEL_FAM6_ALDERLAKE_N 0xBE

-#define INTEL_FAM6_RAPTORLAKE 0xB7
+#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */
#define INTEL_FAM6_RAPTORLAKE_P 0xBA
#define INTEL_FAM6_RAPTORLAKE_S 0xBF

-#define INTEL_FAM6_METEORLAKE 0xAC
+#define INTEL_FAM6_METEORLAKE 0xAC /* Redwood Cove / Crestmont */
#define INTEL_FAM6_METEORLAKE_L 0xAA

-#define INTEL_FAM6_LUNARLAKE_M 0xBD
+#define INTEL_FAM6_ARROWLAKE 0xC6 /* Lion Cove / Skymont */

-#define INTEL_FAM6_ARROWLAKE 0xC6
+#define INTEL_FAM6_LUNARLAKE_M 0xBD

/* "Small Core" Processors (Atom/E-Core) */

@@ -154,9 +154,8 @@
#define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */

-#define INTEL_FAM6_SIERRAFOREST_X 0xAF
-
-#define INTEL_FAM6_GRANDRIDGE 0xB6
+#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
+#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */

/* Xeon Phi */


2023-05-22 20:55:29

by Luck, Tony

[permalink] [raw]
Subject: RE: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

> Moo... Tony, did you sneak product names instead of uarch names in the
> intel-family thing again?

The difficult part is that I've finally got Intel to release product names reasonable
time ahead of launch. But details like which core architecture is used by each
lags behind.

But I think you just announced the Crestmont core.

-Tony


2023-05-22 20:58:14

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

On Mon, May 22, 2023 at 08:42:14PM +0000, Luck, Tony wrote:
> > Moo... Tony, did you sneak product names instead of uarch names in the
> > intel-family thing again?
>
> The difficult part is that I've finally got Intel to release product names reasonable
> time ahead of launch. But details like which core architecture is used by each
> lags behind.
>
> But I think you just announced the Crestmont core.

Crestmont was in Kan's original Changelog, also:

https://en.wikichip.org/wiki/intel/microarchitectures/meteor_lake

has: Core Names Redwood Cove, Crestmont

And:

https://wccftech.com/intel-next-gen-arrow-lake-lunar-lake-cpus-get-preliminary-support-in-aida64/

has a giant list of names too.


Basically, I never can remember these things and I use Google because I
can't be bothered to learn how the intraweb muck works.



2023-06-06 12:59:38

by Liang, Kan

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

Hi Peter,

On 2023-05-22 7:30 a.m., [email protected] wrote:
> From: Kan Liang <[email protected]>
>
> The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
> both have Crestmont core. From the core PMU's perspective, they are
> similar to the e-core of MTL. The only difference is the LBR event
> logging feature, which will be implemented in the following patches.
>
> Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.
>
> Reviewed-by: Andi Kleen <[email protected]>
> Signed-off-by: Kan Liang <[email protected]>
> ---
>


Gentle ping.

Do you have any comments for the patch set?

The patch set based on the perf/core branch which doesn't
include the latest fix, 90befef5a9e8 ("perf/x86: Fix missing sample size
update on AMD BRS").
https://lore.kernel.org/lkml/[email protected]/

Should I rebase it on the perf/urgent and send the V3?


Thanks,
Kan


> No changes since V1.
>
> arch/x86/events/intel/core.c | 52 +++++++++++++++++++++++++++++++++++-
> arch/x86/events/intel/ds.c | 9 +++++--
> arch/x86/events/perf_event.h | 2 ++
> 3 files changed, 60 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index a3fb996a86a1..ba2a971e6b8a 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2119,6 +2119,17 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
> EVENT_EXTRA_END
> };
>
> +EVENT_ATTR_STR(topdown-retiring, td_retiring_cmt, "event=0x72,umask=0x0");
> +EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_cmt, "event=0x73,umask=0x0");
> +
> +static struct attribute *cmt_events_attrs[] = {
> + EVENT_PTR(td_fe_bound_tnt),
> + EVENT_PTR(td_retiring_cmt),
> + EVENT_PTR(td_bad_spec_cmt),
> + EVENT_PTR(td_be_bound_tnt),
> + NULL
> +};
> +
> static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
> /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
> INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
> @@ -4830,6 +4841,8 @@ PMU_FORMAT_ATTR(ldlat, "config1:0-15");
>
> PMU_FORMAT_ATTR(frontend, "config1:0-23");
>
> +PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63");
> +
> static struct attribute *intel_arch3_formats_attr[] = {
> &format_attr_event.attr,
> &format_attr_umask.attr,
> @@ -4860,6 +4873,13 @@ static struct attribute *slm_format_attr[] = {
> NULL
> };
>
> +static struct attribute *cmt_format_attr[] = {
> + &format_attr_offcore_rsp.attr,
> + &format_attr_ldlat.attr,
> + &format_attr_snoop_rsp.attr,
> + NULL
> +};
> +
> static struct attribute *skl_format_attr[] = {
> &format_attr_frontend.attr,
> NULL,
> @@ -5630,7 +5650,6 @@ static struct attribute *adl_hybrid_extra_attr[] = {
> NULL
> };
>
> -PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
> FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);
>
> static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
> @@ -6178,6 +6197,37 @@ __init int intel_pmu_init(void)
> name = "gracemont";
> break;
>
> + case INTEL_FAM6_GRANDRIDGE:
> + case INTEL_FAM6_SIERRAFOREST_X:
> + x86_pmu.mid_ack = true;
> + memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
> + sizeof(hw_cache_event_ids));
> + memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
> + sizeof(hw_cache_extra_regs));
> + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
> +
> + x86_pmu.event_constraints = intel_slm_event_constraints;
> + x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
> + x86_pmu.extra_regs = intel_cmt_extra_regs;
> +
> + x86_pmu.pebs_aliases = NULL;
> + x86_pmu.pebs_prec_dist = true;
> + x86_pmu.lbr_pt_coexist = true;
> + x86_pmu.pebs_block = true;
> + x86_pmu.flags |= PMU_FL_HAS_RSP_1;
> + x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
> +
> + intel_pmu_pebs_data_source_cmt();
> + x86_pmu.pebs_latency_data = mtl_latency_data_small;
> + x86_pmu.get_event_constraints = cmt_get_event_constraints;
> + x86_pmu.limit_period = spr_limit_period;
> + td_attr = cmt_events_attrs;
> + mem_attr = grt_mem_attrs;
> + extra_attr = cmt_format_attr;
> + pr_cont("Crestmont events, ");
> + name = "crestmont";
> + break;
> +
> case INTEL_FAM6_WESTMERE:
> case INTEL_FAM6_WESTMERE_EP:
> case INTEL_FAM6_WESTMERE_EX:
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index a2e566e53076..608e220e46aa 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -144,7 +144,7 @@ void __init intel_pmu_pebs_data_source_adl(void)
> __intel_pmu_pebs_data_source_grt(data_source);
> }
>
> -static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source)
> +static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
> {
> data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
> data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
> @@ -164,7 +164,12 @@ void __init intel_pmu_pebs_data_source_mtl(void)
>
> data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
> memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
> - intel_pmu_pebs_data_source_cmt(data_source);
> + __intel_pmu_pebs_data_source_cmt(data_source);
> +}
> +
> +void __init intel_pmu_pebs_data_source_cmt(void)
> +{
> + __intel_pmu_pebs_data_source_cmt(pebs_data_source);
> }
>
> static u64 precise_store_data(u64 status)
> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
> index d6de4487348c..c8ba2be7585d 100644
> --- a/arch/x86/events/perf_event.h
> +++ b/arch/x86/events/perf_event.h
> @@ -1606,6 +1606,8 @@ void intel_pmu_pebs_data_source_grt(void);
>
> void intel_pmu_pebs_data_source_mtl(void);
>
> +void intel_pmu_pebs_data_source_cmt(void);
> +
> int intel_pmu_setup_lbr_filter(struct perf_event *event);
>
> void intel_pt_interrupt(void);

2023-06-06 14:08:02

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

On Tue, Jun 06, 2023 at 08:42:42AM -0400, Liang, Kan wrote:
> Hi Peter,
>
> On 2023-05-22 7:30 a.m., [email protected] wrote:
> > From: Kan Liang <[email protected]>
> >
> > The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
> > both have Crestmont core. From the core PMU's perspective, they are
> > similar to the e-core of MTL. The only difference is the LBR event
> > logging feature, which will be implemented in the following patches.
> >
> > Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.
> >
> > Reviewed-by: Andi Kleen <[email protected]>
> > Signed-off-by: Kan Liang <[email protected]>
> > ---
> >
>
>
> Gentle ping.
>
> Do you have any comments for the patch set?
>
> The patch set based on the perf/core branch which doesn't
> include the latest fix, 90befef5a9e8 ("perf/x86: Fix missing sample size
> update on AMD BRS").
> https://lore.kernel.org/lkml/[email protected]/
>
> Should I rebase it on the perf/urgent and send the V3?
>

I can pull urgent into perf/core, but:

> > + case INTEL_FAM6_GRANDRIDGE:
> > + case INTEL_FAM6_SIERRAFOREST_X:
^^^^^^^^^^^^^^^

Those are just plain wrong; please fix up the intel-family.h thing like
suggested earlier in this thread.

And Tony, please no more of that platform name nonsense.. we want uarch
names for a reason, so that enums like the above become something
sensible like:

case INTEL_FAM6_ATOM_CRESTMONT:
case INTEL_FAM6_ATOM_CRESTMONT_X:

and now it's super obvious why they're grouped.

> > + pr_cont("Crestmont events, ");

2023-06-06 16:30:08

by Liang, Kan

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest



On 2023-06-06 9:24 a.m., Peter Zijlstra wrote:
> On Tue, Jun 06, 2023 at 08:42:42AM -0400, Liang, Kan wrote:
>> Hi Peter,
>>
>> On 2023-05-22 7:30 a.m., [email protected] wrote:
>>> From: Kan Liang <[email protected]>
>>>
>>> The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
>>> both have Crestmont core. From the core PMU's perspective, they are
>>> similar to the e-core of MTL. The only difference is the LBR event
>>> logging feature, which will be implemented in the following patches.
>>>
>>> Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.
>>>
>>> Reviewed-by: Andi Kleen <[email protected]>
>>> Signed-off-by: Kan Liang <[email protected]>
>>> ---
>>>
>>
>>
>> Gentle ping.
>>
>> Do you have any comments for the patch set?
>>
>> The patch set based on the perf/core branch which doesn't
>> include the latest fix, 90befef5a9e8 ("perf/x86: Fix missing sample size
>> update on AMD BRS").
>> https://lore.kernel.org/lkml/[email protected]/
>>
>> Should I rebase it on the perf/urgent and send the V3?
>>
>
> I can pull urgent into perf/core, but:

Thanks.

>
>>> + case INTEL_FAM6_GRANDRIDGE:
>>> + case INTEL_FAM6_SIERRAFOREST_X:
> ^^^^^^^^^^^^^^^
>
> Those are just plain wrong; please fix up the intel-family.h thing like
> suggested earlier in this thread.
>> And Tony, please no more of that platform name nonsense.. we want uarch
> names for a reason, so that enums like the above become something
> sensible like:
>
> case INTEL_FAM6_ATOM_CRESTMONT:
> case INTEL_FAM6_ATOM_CRESTMONT_X:
>
> and now it's super obvious why they're grouped.
>
>>> + pr_cont("Crestmont events, ");

The Sierra Forest should not be a platform name. I think it's the code
name of the processor.

The problem is that the uarch name doesn't work for the hybrid, since it
has different uarchs in the same processors. To make the naming rules
consistent among big core, atom, and hybrid, maybe we should use the
code name of the processor in intel-family.h.

I will propose a patch to update the rules of using the processor name.
I think we may want to have further discussion there.

Thanks,
Kan

2023-06-06 18:33:46

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

On Tue, Jun 06, 2023 at 12:16:29PM -0400, Liang, Kan wrote:

> > names for a reason, so that enums like the above become something
> > sensible like:
> >
> > case INTEL_FAM6_ATOM_CRESTMONT:
> > case INTEL_FAM6_ATOM_CRESTMONT_X:
> >
> > and now it's super obvious why they're grouped.
> >
> >>> + pr_cont("Crestmont events, ");
>
> The Sierra Forest should not be a platform name. I think it's the code
> name of the processor.
>
> The problem is that the uarch name doesn't work for the hybrid, since it
> has different uarchs in the same processors. To make the naming rules
> consistent among big core, atom, and hybrid, maybe we should use the
> code name of the processor in intel-family.h.

I obviously disagree; these are not hybrid and calling them both
CRESTMONT makes *far* more sense than the random gibberish they're
called now.

Yes, hybrid made a complete mess of things (in many ways), but we should
then not do the obvious correct thing for when we can.

2023-06-06 18:46:52

by Liang, Kan

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest



On 2023-06-06 2:17 p.m., Peter Zijlstra wrote:
> On Tue, Jun 06, 2023 at 12:16:29PM -0400, Liang, Kan wrote:
>
>>> names for a reason, so that enums like the above become something
>>> sensible like:
>>>
>>> case INTEL_FAM6_ATOM_CRESTMONT:
>>> case INTEL_FAM6_ATOM_CRESTMONT_X:
>>>
>>> and now it's super obvious why they're grouped.
>>>
>>>>> + pr_cont("Crestmont events, ");
>>
>> The Sierra Forest should not be a platform name. I think it's the code
>> name of the processor.
>>
>> The problem is that the uarch name doesn't work for the hybrid, since it
>> has different uarchs in the same processors. To make the naming rules
>> consistent among big core, atom, and hybrid, maybe we should use the
>> code name of the processor in intel-family.h.
>
> I obviously disagree; these are not hybrid and calling them both
> CRESTMONT makes *far* more sense than the random gibberish they're
> called now.
>
> Yes, hybrid made a complete mess of things (in many ways), but we should
> then not do the obvious correct thing for when we can.

Besides hybrid, it seems there is a bigger problem for the big core.

The big core uses the processor code name since Ice Lake. In the perf
code, we also uses the processor code name for the big core.
pr_cont("Icelake events, ");

Is it OK to leave the big core as is (using processor code name), but
only change the name for Grand Ridge and Sierra Forest?

Thanks,
Kan

2023-06-06 19:50:41

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

On Tue, Jun 06, 2023 at 02:34:47PM -0400, Liang, Kan wrote:
>
>
> On 2023-06-06 2:17 p.m., Peter Zijlstra wrote:
> > On Tue, Jun 06, 2023 at 12:16:29PM -0400, Liang, Kan wrote:
> >
> >>> names for a reason, so that enums like the above become something
> >>> sensible like:
> >>>
> >>> case INTEL_FAM6_ATOM_CRESTMONT:
> >>> case INTEL_FAM6_ATOM_CRESTMONT_X:
> >>>
> >>> and now it's super obvious why they're grouped.
> >>>
> >>>>> + pr_cont("Crestmont events, ");
> >>
> >> The Sierra Forest should not be a platform name. I think it's the code
> >> name of the processor.
> >>
> >> The problem is that the uarch name doesn't work for the hybrid, since it
> >> has different uarchs in the same processors. To make the naming rules
> >> consistent among big core, atom, and hybrid, maybe we should use the
> >> code name of the processor in intel-family.h.
> >
> > I obviously disagree; these are not hybrid and calling them both
> > CRESTMONT makes *far* more sense than the random gibberish they're
> > called now.
> >
> > Yes, hybrid made a complete mess of things (in many ways), but we should
> > then not do the obvious correct thing for when we can.
>
> Besides hybrid, it seems there is a bigger problem for the big core.
>
> The big core uses the processor code name since Ice Lake. In the perf
> code, we also uses the processor code name for the big core.
> pr_cont("Icelake events, ");

Yeah, it's a mess :/ Ideally that would print "Sunny Cove", but I think
there's userspace looking at that string :-(

> Is it OK to leave the big core as is (using processor code name), but
> only change the name for Grand Ridge and Sierra Forest?

Arguably we should also rename ALDERLAKE_N to ATOM_GRACEMONT


We should also do something about that whole hybrid init thing, the
meteorlake stuff is quite a mess as well. Perhaps we can add hybrid_pmu
next to intel_pmu to have a better start in life for x86_pmu.

Anyway, we should really try not to make a bigger mess and try and clean
up where we can.

2023-06-06 20:06:40

by Liang, Kan

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest



On 2023-06-06 3:37 p.m., Peter Zijlstra wrote:
> On Tue, Jun 06, 2023 at 02:34:47PM -0400, Liang, Kan wrote:
>>
>>
>> On 2023-06-06 2:17 p.m., Peter Zijlstra wrote:
>>> On Tue, Jun 06, 2023 at 12:16:29PM -0400, Liang, Kan wrote:
>>>
>>>>> names for a reason, so that enums like the above become something
>>>>> sensible like:
>>>>>
>>>>> case INTEL_FAM6_ATOM_CRESTMONT:
>>>>> case INTEL_FAM6_ATOM_CRESTMONT_X:
>>>>>
>>>>> and now it's super obvious why they're grouped.
>>>>>
>>>>>>> + pr_cont("Crestmont events, ");
>>>>
>>>> The Sierra Forest should not be a platform name. I think it's the code
>>>> name of the processor.
>>>>
>>>> The problem is that the uarch name doesn't work for the hybrid, since it
>>>> has different uarchs in the same processors. To make the naming rules
>>>> consistent among big core, atom, and hybrid, maybe we should use the
>>>> code name of the processor in intel-family.h.
>>>
>>> I obviously disagree; these are not hybrid and calling them both
>>> CRESTMONT makes *far* more sense than the random gibberish they're
>>> called now.
>>>
>>> Yes, hybrid made a complete mess of things (in many ways), but we should
>>> then not do the obvious correct thing for when we can.
>>
>> Besides hybrid, it seems there is a bigger problem for the big core.
>>
>> The big core uses the processor code name since Ice Lake. In the perf
>> code, we also uses the processor code name for the big core.
>> pr_cont("Icelake events, ");
>
> Yeah, it's a mess :/ Ideally that would print "Sunny Cove", but I think
> there's userspace looking at that string :-(

Yes, for the existing names, we probably cannot change it. I will try to
only use the micro-architecture name for the future platforms in perf.

>
>> Is it OK to leave the big core as is (using processor code name), but
>> only change the name for Grand Ridge and Sierra Forest?
>
> Arguably we should also rename ALDERLAKE_N to ATOM_GRACEMONT
>
>
> We should also do something about that whole hybrid init thing, the
> meteorlake stuff is quite a mess as well. Perhaps we can add hybrid_pmu
> next to intel_pmu to have a better start in life for x86_pmu.
>

I will think about it and try to clean up the hybrid init.

> Anyway, we should really try not to make a bigger mess and try and clean
> up where we can.

Sure.

Thanks,
Kan

2023-06-07 21:53:38

by Luck, Tony

[permalink] [raw]
Subject: RE: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

> > Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.
>
> Moo... Tony, did you sneak product names instead of uarch names in the
> intel-family thing again?
>
> That is; I'm thinking we want the below, no?
>
> -#define INTEL_FAM6_SIERRAFOREST_X 0xAF
> -
> -#define INTEL_FAM6_GRANDRIDGE 0xB6
> +#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
> +#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */

I don't think that's really any more helpful.

Using the code name of the model makes it easy to look things
up in ark.intel.com. Using the "core" name doesn't even work for
hybrid CPU models which have more than one core type.
So I'd like to keep it as it is.

But if you want to change to the core name, then please just
do it now.

There are folks internally worried that all upstream work for
these two CPU models is going to be blocked while this
is discussed.

-Tony

2023-06-08 07:44:16

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

On Wed, Jun 07, 2023 at 09:43:57PM +0000, Luck, Tony wrote:
> > > Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.
> >
> > Moo... Tony, did you sneak product names instead of uarch names in the
> > intel-family thing again?
> >
> > That is; I'm thinking we want the below, no?
> >
> > -#define INTEL_FAM6_SIERRAFOREST_X 0xAF
> > -
> > -#define INTEL_FAM6_GRANDRIDGE 0xB6
> > +#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
> > +#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */
>
> I don't think that's really any more helpful.

Well, it clearly shows why these two are grouped together. They are the
same bloody uarch. OTOH 'Sierra Forest' and 'Grand Ridge' is just random
gibberish that nobody can relate without looking up some website.

> Using the code name of the model makes it easy to look things
> up in ark.intel.com. Using the "core" name doesn't even work for
> hybrid CPU models which have more than one core type.
> So I'd like to keep it as it is.

ark.intel.com is a travesty anyway... if I get it as a result to a
Google query I will almost always avoid it because it is not useful.

Wikipedia and Wikichip are by far more useful.

> But if you want to change to the core name, then please just
> do it now.
>
> There are folks internally worried that all upstream work for
> these two CPU models is going to be blocked while this
> is discussed.

Then I'm hoping their take-away is that random gibberish names don't
help anybody. The whole Intel naming scheme is impenetrable crap.

2023-06-08 16:35:47

by Luck, Tony

[permalink] [raw]
Subject: RE: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

> Then I'm hoping their take-away is that random gibberish names don't
> help anybody. The whole Intel naming scheme is impenetrable crap.

> > +#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
> > +#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */

This just adds another layer of confusion. Sure, these two models are based
on the same core. But giving the illusion that they are somehow the same will
lead to tears before bedtime:

1) They each took a snapshot of that core design on different dates, so there
are logic differences.
2) Feature fuses will be different
3) Microcode will be different
4) BIOS will be different
5) "uncore" is different, so anything implemented outside of the core
will be different.

-Tony



2023-06-29 22:41:30

by Luck, Tony

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

On Thu, Jun 08, 2023 at 04:20:22PM +0000, Luck, Tony wrote:
> > Then I'm hoping their take-away is that random gibberish names don't
> > help anybody. The whole Intel naming scheme is impenetrable crap.
>
> > > +#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
> > > +#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */
>
> This just adds another layer of confusion. Sure, these two models are based
> on the same core. But giving the illusion that they are somehow the same will
> lead to tears before bedtime:
>
> 1) They each took a snapshot of that core design on different dates, so there
> are logic differences.
> 2) Feature fuses will be different
> 3) Microcode will be different
> 4) BIOS will be different
> 5) "uncore" is different, so anything implemented outside of the core
> will be different.

This thread stalled. But the internal conversation continued. There
seems a strong argument that enough things changed when Xeon-izing
the core to go into Sierra Forest that using Crestmont will cause
confusion in more places than it helps. There seem to be some internal
folks using an entirely different name for this core (which I won't
repeat here, but some of the usual external sites have mentions of
this other name).

Can we just keep:

#define INTEL_FAM6_SIERRAFOREST_X 0xAF

and move on to more interesting things?

-Tony

2023-08-02 15:16:01

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest

On Thu, Jun 08, 2023 at 04:20:22PM +0000, Luck, Tony wrote:
> > Then I'm hoping their take-away is that random gibberish names don't
> > help anybody. The whole Intel naming scheme is impenetrable crap.
>
> > > +#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
> > > +#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */
>
> This just adds another layer of confusion. Sure, these two models are based
> on the same core. But giving the illusion that they are somehow the same will
> lead to tears before bedtime:
>
> 1) They each took a snapshot of that core design on different dates, so there
> are logic differences.
> 2) Feature fuses will be different
> 3) Microcode will be different
> 4) BIOS will be different
> 5) "uncore" is different, so anything implemented outside of the core
> will be different.

All those things are true for INTEL_FAM6_SKYLAKE vs INTEL_FAM6_SKYLAKE_X
and all the other pre-hybrid desktop/server parts.

And we used to do the same with the previous ATOM things, see GOLDMONT /
GOLDMONT_D, TREMONT / TREMONT_D etc..

So why should this atom be treated differently? We get a server atom and
a client atom, yes they different in all the usual way, but they still
more similar to one another than to any other random chip we have.

In short, we used to have this for core parts, we used to have this for
atom parts, but now we magically need to break from it?

Anyway, let me do the rename and squish everything into a git tree.


Subject: [tip: perf/core] perf/x86/intel: Add Crestmont PMU

The following commit has been merged into the perf/core branch of tip:

Commit-ID: a430021faad6b4fa86c820fc3e7f8dbfc2f14fb4
Gitweb: https://git.kernel.org/tip/a430021faad6b4fa86c820fc3e7f8dbfc2f14fb4
Author: Kan Liang <[email protected]>
AuthorDate: Mon, 22 May 2023 04:30:35 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Wed, 09 Aug 2023 21:51:07 +02:00

perf/x86/intel: Add Crestmont PMU

The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
both have Crestmont core. From the core PMU's perspective, they are
similar to the e-core of MTL. The only difference is the LBR event
logging feature, which will be implemented in the following patches.

Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.

Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Andi Kleen <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/events/intel/core.c | 52 ++++++++++++++++++++++++++++++++++-
arch/x86/events/intel/ds.c | 9 ++++--
arch/x86/events/perf_event.h | 2 +-
3 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index c7e7ed6..64a3533 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2129,6 +2129,17 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};

+EVENT_ATTR_STR(topdown-retiring, td_retiring_cmt, "event=0x72,umask=0x0");
+EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_cmt, "event=0x73,umask=0x0");
+
+static struct attribute *cmt_events_attrs[] = {
+ EVENT_PTR(td_fe_bound_tnt),
+ EVENT_PTR(td_retiring_cmt),
+ EVENT_PTR(td_bad_spec_cmt),
+ EVENT_PTR(td_be_bound_tnt),
+ NULL
+};
+
static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
@@ -4840,6 +4851,8 @@ PMU_FORMAT_ATTR(ldlat, "config1:0-15");

PMU_FORMAT_ATTR(frontend, "config1:0-23");

+PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63");
+
static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@@ -4870,6 +4883,13 @@ static struct attribute *slm_format_attr[] = {
NULL
};

+static struct attribute *cmt_format_attr[] = {
+ &format_attr_offcore_rsp.attr,
+ &format_attr_ldlat.attr,
+ &format_attr_snoop_rsp.attr,
+ NULL
+};
+
static struct attribute *skl_format_attr[] = {
&format_attr_frontend.attr,
NULL,
@@ -5649,7 +5669,6 @@ static struct attribute *adl_hybrid_extra_attr[] = {
NULL
};

-PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);

static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
@@ -6197,6 +6216,37 @@ __init int intel_pmu_init(void)
name = "gracemont";
break;

+ case INTEL_FAM6_ATOM_CRESTMONT:
+ case INTEL_FAM6_ATOM_CRESTMONT_X:
+ x86_pmu.mid_ack = true;
+ memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+ hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_cmt_extra_regs;
+
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.lbr_pt_coexist = true;
+ x86_pmu.pebs_block = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+
+ intel_pmu_pebs_data_source_cmt();
+ x86_pmu.pebs_latency_data = mtl_latency_data_small;
+ x86_pmu.get_event_constraints = cmt_get_event_constraints;
+ x86_pmu.limit_period = spr_limit_period;
+ td_attr = cmt_events_attrs;
+ mem_attr = grt_mem_attrs;
+ extra_attr = cmt_format_attr;
+ pr_cont("Crestmont events, ");
+ name = "crestmont";
+ break;
+
case INTEL_FAM6_WESTMERE:
case INTEL_FAM6_WESTMERE_EP:
case INTEL_FAM6_WESTMERE_EX:
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index df88576..eb8dd8b 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -144,7 +144,7 @@ void __init intel_pmu_pebs_data_source_adl(void)
__intel_pmu_pebs_data_source_grt(data_source);
}

-static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source)
+static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
{
data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
@@ -164,7 +164,12 @@ void __init intel_pmu_pebs_data_source_mtl(void)

data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
- intel_pmu_pebs_data_source_cmt(data_source);
+ __intel_pmu_pebs_data_source_cmt(data_source);
+}
+
+void __init intel_pmu_pebs_data_source_cmt(void)
+{
+ __intel_pmu_pebs_data_source_cmt(pebs_data_source);
}

static u64 precise_store_data(u64 status)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index d6de448..c8ba2be 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1606,6 +1606,8 @@ void intel_pmu_pebs_data_source_grt(void);

void intel_pmu_pebs_data_source_mtl(void);

+void intel_pmu_pebs_data_source_cmt(void);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);

void intel_pt_interrupt(void);