2020-04-01 17:52:55

by Reinette Chatre

[permalink] [raw]
Subject: [PATCH 2/2] x86/resctrl: Support CPUID enumeration of MBM counter width

The original Memory Bandwidth Monitoring (MBM) architectural
definition defines counters of up to 62 bits in the
IA32_QM_CTR MSR while the first-generation MBM implementation
uses statically defined 24 bit counters.

Expand the MBM CPUID enumeration properties to include the MBM
counter width. The previously unused EAX output register contains,
in bits [7:0], the MBM counter width encoded as an offset from
24 bits. Enumerating this property is only specified for Intel
CPUs. A vendor check is added to ensure original behavior is
maintained for AMD CPUs.

While eight bits are available for the counter width offset
IA32_QM_CTR MSR only supports 62 bit counters. A sanity check,
with warning printed when encountered, is added to ensure counters
cannot exceed the 62 bit limit.

Signed-off-by: Reinette Chatre <[email protected]>

---
Dear Maintainers,

Because of the need for a vendor check there seems to be a few ways
in which enumerating this new MBM property can be implemented. While
this implementation seems to be the simplest it does introduce an
explicit vendor check into arch/x86/kernel/cpu/common.c where there
does not seem to be a precedent for such a change.

The goal is to support this correctly and I look forward to your
guidance on how the enable this feature correctly.

Regards,

Reinette

arch/x86/include/asm/processor.h | 1 +
arch/x86/kernel/cpu/common.c | 5 +++++
arch/x86/kernel/cpu/resctrl/internal.h | 8 +++++++-
arch/x86/kernel/cpu/resctrl/monitor.c | 8 +++++++-
4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 09705ccc393c..69d957486f25 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -115,6 +115,7 @@ struct cpuinfo_x86 {
/* Cache QoS architectural values: */
int x86_cache_max_rmid; /* max index */
int x86_cache_occ_scale; /* scale to bytes */
+ int x86_cache_mbm_width_offset;
int x86_power;
unsigned long loops_per_jiffy;
/* cpuid returned max cores value: */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4cdb123ff66a..8552d2fadc15 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -856,6 +856,8 @@ static void init_speculation_control(struct cpuinfo_x86 *c)

static void init_cqm(struct cpuinfo_x86 *c)
{
+ c->x86_cache_mbm_width_offset = -1;
+
if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
c->x86_cache_max_rmid = -1;
c->x86_cache_occ_scale = -1;
@@ -875,6 +877,9 @@ static void init_cqm(struct cpuinfo_x86 *c)

c->x86_cache_max_rmid = ecx;
c->x86_cache_occ_scale = ebx;
+ /* EAX contents is only defined for Intel CPUs */
+ if (c->x86_vendor == X86_VENDOR_INTEL)
+ c->x86_cache_mbm_width_offset = eax & 0xff;
}
}

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 28947c790faa..3a16d1c0ff40 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -31,7 +31,7 @@

#define CQM_LIMBOCHECK_INTERVAL 1000

-#define MBM_CNTR_WIDTH 24
+#define MBM_CNTR_WIDTH_BASE 24
#define MBM_OVERFLOW_INTERVAL 1000
#define MAX_MBA_BW 100u
#define MBA_IS_LINEAR 0x4
@@ -40,6 +40,12 @@

#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)
+/*
+ * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for
+ * data to be returned. The counter width is discovered from the hardware
+ * as an offset from MBM_CNTR_WIDTH_BASE.
+ */
+#define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)


struct rdt_fs_context {
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index df964c03f6c6..837d7d012b7b 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -618,12 +618,18 @@ static void l3_mon_evt_init(struct rdt_resource *r)

int rdt_get_mon_l3_config(struct rdt_resource *r)
{
+ unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
unsigned int cl_size = boot_cpu_data.x86_cache_size;
int ret;

r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
- r->mbm_width = MBM_CNTR_WIDTH;
+ r->mbm_width = MBM_CNTR_WIDTH_BASE;
+
+ if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
+ r->mbm_width += mbm_offset;
+ else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
+ pr_warn("Ignoring impossible MBM counter offset\n");

/*
* A reasonable upper limit on the max threshold is the number
--
2.21.0


2020-04-03 00:09:07

by Fenghua Yu

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/resctrl: Support CPUID enumeration of MBM counter width

On Wed, Apr 01, 2020 at 10:51:02AM -0700, Reinette Chatre wrote:
> The original Memory Bandwidth Monitoring (MBM) architectural
> definition defines counters of up to 62 bits in the
> IA32_QM_CTR MSR while the first-generation MBM implementation
> uses statically defined 24 bit counters.
>
> @@ -856,6 +856,8 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
>
> static void init_cqm(struct cpuinfo_x86 *c)
> {
> + c->x86_cache_mbm_width_offset = -1;
> +
> if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
> c->x86_cache_max_rmid = -1;
> c->x86_cache_occ_scale = -1;
> @@ -875,6 +877,9 @@ static void init_cqm(struct cpuinfo_x86 *c)
>
> c->x86_cache_max_rmid = ecx;
> c->x86_cache_occ_scale = ebx;
> + /* EAX contents is only defined for Intel CPUs */
> + if (c->x86_vendor == X86_VENDOR_INTEL)
> + c->x86_cache_mbm_width_offset = eax & 0xff;

Is it reliable to read eax which is reserved on older platforms that
don't support the feature?

Seems the code assumes the reserved eax is 0 on those platforms. Is it
reliable?

> int rdt_get_mon_l3_config(struct rdt_resource *r)
> {
> + unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
> unsigned int cl_size = boot_cpu_data.x86_cache_size;
> int ret;
>
> r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
> r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
> - r->mbm_width = MBM_CNTR_WIDTH;
> + r->mbm_width = MBM_CNTR_WIDTH_BASE;
> +
> + if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
> + r->mbm_width += mbm_offset;
> + else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
> + pr_warn("Ignoring impossible MBM counter offset\n");
>

Thanks.

-Fenghua

2020-04-03 15:33:49

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/resctrl: Support CPUID enumeration of MBM counter width

Hi Fenghua,

On 4/2/2020 5:05 PM, Fenghua Yu wrote:
> On Wed, Apr 01, 2020 at 10:51:02AM -0700, Reinette Chatre wrote:
>> The original Memory Bandwidth Monitoring (MBM) architectural
>> definition defines counters of up to 62 bits in the
>> IA32_QM_CTR MSR while the first-generation MBM implementation
>> uses statically defined 24 bit counters.
>>
>> @@ -856,6 +856,8 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
>>
>> static void init_cqm(struct cpuinfo_x86 *c)
>> {
>> + c->x86_cache_mbm_width_offset = -1;
>> +
>> if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
>> c->x86_cache_max_rmid = -1;
>> c->x86_cache_occ_scale = -1;
>> @@ -875,6 +877,9 @@ static void init_cqm(struct cpuinfo_x86 *c)
>>
>> c->x86_cache_max_rmid = ecx;
>> c->x86_cache_occ_scale = ebx;
>> + /* EAX contents is only defined for Intel CPUs */
>> + if (c->x86_vendor == X86_VENDOR_INTEL)
>> + c->x86_cache_mbm_width_offset = eax & 0xff;
>
> Is it reliable to read eax which is reserved on older platforms that
> don't support the feature?

The new ISE specification contains an architectural redefinition of EAX.

>
> Seems the code assumes the reserved eax is 0 on those platforms. Is it
> reliable?

Testing on BDW, SKX, and CLX confirmed that EAX is 0. This addition thus
results in no functional change on these systems with them continuing to
use the original MBM width of 24.

Reinette

2020-04-29 18:13:58

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/resctrl: Support CPUID enumeration of MBM counter width

On Wed, Apr 01, 2020 at 10:51:02AM -0700, Reinette Chatre wrote:
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 4cdb123ff66a..8552d2fadc15 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -856,6 +856,8 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
>
> static void init_cqm(struct cpuinfo_x86 *c)
> {
> + c->x86_cache_mbm_width_offset = -1;
> +
> if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
> c->x86_cache_max_rmid = -1;
> c->x86_cache_occ_scale = -1;
> @@ -875,6 +877,9 @@ static void init_cqm(struct cpuinfo_x86 *c)
>
> c->x86_cache_max_rmid = ecx;
> c->x86_cache_occ_scale = ebx;
> + /* EAX contents is only defined for Intel CPUs */
> + if (c->x86_vendor == X86_VENDOR_INTEL)
> + c->x86_cache_mbm_width_offset = eax & 0xff;

Remind me again pls why is all this RDT stuff replicated per CPU instead
of it being properly detected somewhere down in resctrl_late_init()?

Looking at get_rdt_resources(), it kinda wants to have that in there
too?

IOW, how about moving all that gunk in init_cqm() to resctrl/ where it
truly belongs? Doesn't have to be this patchset but this patchset can
start moving it...

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2020-04-29 18:46:36

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/resctrl: Support CPUID enumeration of MBM counter width

Hi Borislav,

On 4/29/2020 11:11 AM, Borislav Petkov wrote:
> On Wed, Apr 01, 2020 at 10:51:02AM -0700, Reinette Chatre wrote:
>> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
>> index 4cdb123ff66a..8552d2fadc15 100644
>> --- a/arch/x86/kernel/cpu/common.c
>> +++ b/arch/x86/kernel/cpu/common.c
>> @@ -856,6 +856,8 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
>>
>> static void init_cqm(struct cpuinfo_x86 *c)
>> {
>> + c->x86_cache_mbm_width_offset = -1;
>> +
>> if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
>> c->x86_cache_max_rmid = -1;
>> c->x86_cache_occ_scale = -1;
>> @@ -875,6 +877,9 @@ static void init_cqm(struct cpuinfo_x86 *c)
>>
>> c->x86_cache_max_rmid = ecx;
>> c->x86_cache_occ_scale = ebx;
>> + /* EAX contents is only defined for Intel CPUs */
>> + if (c->x86_vendor == X86_VENDOR_INTEL)
>> + c->x86_cache_mbm_width_offset = eax & 0xff;
>
> Remind me again pls why is all this RDT stuff replicated per CPU instead
> of it being properly detected somewhere down in resctrl_late_init()?
>
> Looking at get_rdt_resources(), it kinda wants to have that in there
> too?
>
> IOW, how about moving all that gunk in init_cqm() to resctrl/ where it
> truly belongs? Doesn't have to be this patchset but this patchset can
> start moving it...

I am not familiar with the history to clarify to you why the RDT feature
enumeration is not consistent. Even so, there was an effort to
consolidate this in [1] but it was found to go against the goal of
centralizing the CPUID information [2] and was not pursued further. I
would be happy to revisit this if this is the direction that you prefer.
This would essentially be resubmitting [1] though. Do you expect that
this change would receive a different reception at this time?

Thank you

Reinette


[1]
https://lore.kernel.org/lkml/[email protected]/

[2]
https://lore.kernel.org/lkml/[email protected]/




2020-04-30 10:03:19

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/resctrl: Support CPUID enumeration of MBM counter width

On Wed, Apr 29, 2020 at 11:42:03AM -0700, Reinette Chatre wrote:
> This would essentially be resubmitting [1] though. Do you expect that
> this change would receive a different reception at this time?

Right, Thomas and I talked it over a bit last night. So the proper
thing to do is to read all that information *once* and put it in
boot_cpu_data. Because that information is replicated the same over
CPUID on each core. If there's per-CPU stuff, then it should remain
per-CPU but looking at how the RDT code uses boot_cpu_data, I'd say this
is global info.

So, it should be parsed once on the BSP during boot and put into
boot_cpu_data. And then silly stuff like x86_init_cache_qos() should go
away too.

If this info is needed on Intel only, then it should be parsed in
cpu/intel.c, in a ->c_bsp_init helper and if it is needed on AMD too,
then a function which does this should be called by the respective
c_bsp_init helper.

Then all its users can continue reading it out of boot_cpu_data and
future RDT hw info can be added there.

Makes sense?

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2020-05-03 18:56:06

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/resctrl: Support CPUID enumeration of MBM counter width

Hi Borislav,

On 4/30/2020 2:59 AM, Borislav Petkov wrote:
> On Wed, Apr 29, 2020 at 11:42:03AM -0700, Reinette Chatre wrote:
>> This would essentially be resubmitting [1] though. Do you expect that
>> this change would receive a different reception at this time?
>
> Right, Thomas and I talked it over a bit last night. So the proper
> thing to do is to read all that information *once* and put it in
> boot_cpu_data. Because that information is replicated the same over
> CPUID on each core. If there's per-CPU stuff, then it should remain
> per-CPU but looking at how the RDT code uses boot_cpu_data, I'd say this
> is global info.
>
> So, it should be parsed once on the BSP during boot and put into
> boot_cpu_data. And then silly stuff like x86_init_cache_qos() should go
> away too.

You are correct. I looked through the hardware errata and confirmed with
a few people very knowledgeable about the history and technical details
of CMT that x86_init_cache_qos() is not necessary. There exists no
platform where the CPUs on a platform that support CMT either does not
report a RMID or report different RMID.

Also, for the existing implementation moving init_cqm() to
early_identify_cpu() makes sense for all the reasons you mention.

I am struggling with what should follow ...

>
> If this info is needed on Intel only, then it should be parsed in
> cpu/intel.c, in a ->c_bsp_init helper and if it is needed on AMD too,
> then a function which does this should be called by the respective
> c_bsp_init helper.

Using c_bsp_init may be needed to obtain the Intel-only property that
the patch that started this originally attempted to do. AMD and Intel
support the same CPUID leaf and two sub-leaves ... only differing in the
one new register that is defined for Intel but undefined for AMD.

I am concerned with how I am interpreting your suggestion here since my
interpretation does end up with duplicate code between the two
c_bsp_init helpers. Below is this snippet - is this how you envisioned
this change? (Please note that in this snippet you would find init_cqm()
moved from early_identify_cpu(), this change is thus done with the
assumption that your earlier suggestions have all been applied already.)

diff --git a/arch/x86/include/asm/processor.h
b/arch/x86/include/asm/processor.h
index 3bcf27caf6c9..76f86fdb02af 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -116,6 +116,7 @@ struct cpuinfo_x86 {
/* Cache QoS architectural values: */
int x86_cache_max_rmid; /* max index */
int x86_cache_occ_scale; /* scale to bytes */
+ int x86_cache_mbm_width_offset;
int x86_power;
unsigned long loops_per_jiffy;
/* cpuid returned max cores value: */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 547ad7bbf0e0..2e6c718ba793 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -512,6 +512,38 @@ static void early_init_amd_mc(struct cpuinfo_x86 *c)
#endif
}

+/*
+ * Significant overlap with the Intel initialization found in
+ * init_llc_monitoring_intel() since CPUID leaf 0xF subleaf 0x1
+ * differ in all but one register (used to initialize
+ * x86_cache_mbm_width_offset) that is undefined on AMD.
+ */
+static void init_llc_monitoring_amd(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
+ c->x86_cache_max_rmid = -1;
+ c->x86_cache_occ_scale = -1;
+ c->x86_cache_mbm_width_offset = -1;
+ return;
+ }
+
+ /* will be overridden if occupancy monitoring exists */
+ c->x86_cache_max_rmid = cpuid_ebx(0xf);
+
+ if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
+ u32 eax, ebx, ecx, edx;
+
+ /* QoS sub-leaf, EAX=0Fh, ECX=1 */
+ cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
+
+ c->x86_cache_max_rmid = ecx;
+ c->x86_cache_occ_scale = ebx;
+ c->x86_cache_mbm_width_offset = -1;
+ }
+}
+
static void bsp_init_amd(struct cpuinfo_x86 *c)
{

@@ -597,6 +629,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
}
}
+
+ init_llc_monitoring_amd(c);
}

static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3151a366b0a8..d07809286b95 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -854,30 +854,6 @@ static void init_speculation_control(struct
cpuinfo_x86 *c)
}
}

-static void init_cqm(struct cpuinfo_x86 *c)
-{
- if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
- c->x86_cache_max_rmid = -1;
- c->x86_cache_occ_scale = -1;
- return;
- }
-
- /* will be overridden if occupancy monitoring exists */
- c->x86_cache_max_rmid = cpuid_ebx(0xf);
-
- if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
- cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
- cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
- u32 eax, ebx, ecx, edx;
-
- /* QoS sub-leaf, EAX=0Fh, ECX=1 */
- cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
-
- c->x86_cache_max_rmid = ecx;
- c->x86_cache_occ_scale = ebx;
- }
-}
-
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -1212,7 +1188,6 @@ static void __init early_identify_cpu(struct
cpuinfo_x86 *c)

c->cpu_index = 0;
filter_cpuid_features(c, false);
- init_cqm(c);

if (this_cpu->c_bsp_init)
this_cpu->c_bsp_init(c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index bf08d4508ecb..0775090bd5e2 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -322,6 +322,46 @@ static void early_init_intel(struct cpuinfo_x86 *c)
detect_ht_early(c);
}

+/*
+ * Significant overlap with the AMD initialization found in
+ * init_llc_monitoring_amd() since CPUID leaf 0xF subleaf 0x1
+ * differ in all but one register (used to initialize
+ * x86_cache_mbm_width_offset) that is undefined on AMD.
+ */
+static void init_llc_monitoring_intel(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
+ c->x86_cache_max_rmid = -1;
+ c->x86_cache_occ_scale = -1;
+ c->x86_cache_mbm_width_offset = -1;
+ return;
+ }
+
+ /* will be overridden if occupancy monitoring exists */
+ c->x86_cache_max_rmid = cpuid_ebx(0xf);
+
+ if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
+ u32 eax, ebx, ecx, edx;
+
+ /* QoS sub-leaf, EAX=0Fh, ECX=1 */
+ cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
+
+ c->x86_cache_max_rmid = ecx;
+ c->x86_cache_occ_scale = ebx;
+ c->x86_cache_mbm_width_offset = eax & 0xff;
+ }
+}
+
+/*
+ * Initialization work to be done only once during boot.
+ */
+static void bsp_init_intel(struct cpuinfo_x86 *c)
+{
+ init_llc_monitoring_intel(c);
+}
+
#ifdef CONFIG_X86_32
/*
* Early probe support logic for ppro memory erratum #50
@@ -961,6 +1001,7 @@ static const struct cpu_dev intel_cpu_dev = {
#endif
.c_detect_tlb = intel_detect_tlb,
.c_early_init = early_init_intel,
+ .c_bsp_init = bsp_init_intel,
.c_init = init_intel,
.c_x86_vendor = X86_VENDOR_INTEL,
};

>
> Then all its users can continue reading it out of boot_cpu_data and
> future RDT hw info can be added there.

Understood.

Thank you very much

Reinette

2020-05-04 07:58:51

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/resctrl: Support CPUID enumeration of MBM counter width

Hi,

On Sun, May 03, 2020 at 11:51:00AM -0700, Reinette Chatre wrote:
> I am struggling with what should follow ...

Since a diff is better than a thousand words :-) see below.

It builds here with my .config but I'm sure it still needs work but you
get the idea. When you do the final version, please split it properly as
in:

- one patch does this:

rename from arch/x86/include/asm/resctrl_sched.h
rename to arch/x86/include/asm/resctrl.h

- the other adds resctrl_cpu_detect()

- last one removes init_cqm() and x86_init_cache_qos(), etc.

Thx.

---
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3eeaaeb75638..29ee0c088009 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -113,9 +113,10 @@ struct cpuinfo_x86 {
/* in KB - valid for CPUS which support this call: */
unsigned int x86_cache_size;
int x86_cache_alignment; /* In bytes */
- /* Cache QoS architectural values: */
+ /* Cache QoS architectural values, valid only on the BSP: */
int x86_cache_max_rmid; /* max index */
int x86_cache_occ_scale; /* scale to bytes */
+ int x86_cache_mbm_width_offset;
int x86_power;
unsigned long loops_per_jiffy;
/* cpuid returned max cores value: */
diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl.h
similarity index 95%
rename from arch/x86/include/asm/resctrl_sched.h
rename to arch/x86/include/asm/resctrl.h
index f6b7fe2833cc..c8a27cbbdae2 100644
--- a/arch/x86/include/asm/resctrl_sched.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -84,9 +84,12 @@ static inline void resctrl_sched_in(void)
__resctrl_sched_in();
}

+void resctrl_cpu_detect(struct cpuinfo_x86 *c);
+
#else

static inline void resctrl_sched_in(void) {}
+static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}

#endif /* CONFIG_X86_CPU_RESCTRL */

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 547ad7bbf0e0..c36e89930965 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -18,6 +18,7 @@
#include <asm/pci-direct.h>
#include <asm/delay.h>
#include <asm/debugreg.h>
+#include <asm/resctrl.h>

#ifdef CONFIG_X86_64
# include <asm/mmconfig.h>
@@ -597,6 +598,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
}
}
+
+ resctrl_cpu_detect(c);
}

static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index eab3ebd22927..74682b8d09b0 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -877,30 +877,6 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
}
}

-static void init_cqm(struct cpuinfo_x86 *c)
-{
- if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
- c->x86_cache_max_rmid = -1;
- c->x86_cache_occ_scale = -1;
- return;
- }
-
- /* will be overridden if occupancy monitoring exists */
- c->x86_cache_max_rmid = cpuid_ebx(0xf);
-
- if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
- cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
- cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
- u32 eax, ebx, ecx, edx;
-
- /* QoS sub-leaf, EAX=0Fh, ECX=1 */
- cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
-
- c->x86_cache_max_rmid = ecx;
- c->x86_cache_occ_scale = ebx;
- }
-}
-
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -968,7 +944,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)

init_scattered_cpuid_features(c);
init_speculation_control(c);
- init_cqm(c);

/*
* Clear/Set all flags overridden by options, after probe.
@@ -1400,20 +1375,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
#endif
}

-static void x86_init_cache_qos(struct cpuinfo_x86 *c)
-{
- /*
- * The heavy lifting of max_rmid and cache_occ_scale are handled
- * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu
- * in case CQM bits really aren't there in this CPU.
- */
- if (c != &boot_cpu_data) {
- boot_cpu_data.x86_cache_max_rmid =
- min(boot_cpu_data.x86_cache_max_rmid,
- c->x86_cache_max_rmid);
- }
-}
-
/*
* Validate that ACPI/mptables have the same information about the
* effective APIC id and update the package map.
@@ -1526,7 +1487,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
#endif

x86_init_rdrand(c);
- x86_init_cache_qos(c);
setup_pku(c);

/*
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index a19a680542ce..166d7c355896 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -22,6 +22,7 @@
#include <asm/cpu_device_id.h>
#include <asm/cmdline.h>
#include <asm/traps.h>
+#include <asm/resctrl.h>

#ifdef CONFIG_X86_64
#include <linux/topology.h>
@@ -322,6 +323,11 @@ static void early_init_intel(struct cpuinfo_x86 *c)
detect_ht_early(c);
}

+static void bsp_init_intel(struct cpuinfo_x86 *c)
+{
+ resctrl_cpu_detect(c);
+}
+
#ifdef CONFIG_X86_32
/*
* Early probe support logic for ppro memory erratum #50
@@ -961,6 +967,7 @@ static const struct cpu_dev intel_cpu_dev = {
#endif
.c_detect_tlb = intel_detect_tlb,
.c_early_init = early_init_intel,
+ .c_bsp_init = bsp_init_intel,
.c_init = init_intel,
.c_x86_vendor = X86_VENDOR_INTEL,
};
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index d8cc5223b7ce..5e5955aa6593 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -22,7 +22,7 @@
#include <linux/cpuhotplug.h>

#include <asm/intel-family.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
#include "internal.h"

/* Mutex to protect rdtgroup access. */
@@ -958,6 +958,35 @@ static __init void rdt_init_res_defs(void)

static enum cpuhp_state rdt_online;

+/* Runs once on the BSP during boot. */
+void resctrl_cpu_detect(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
+ c->x86_cache_max_rmid = -1;
+ c->x86_cache_occ_scale = -1;
+ c->x86_cache_mbm_width_offset = -1;
+ return;
+ }
+
+ /* will be overridden if occupancy monitoring exists */
+ c->x86_cache_max_rmid = cpuid_ebx(0xf);
+
+ if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
+ u32 eax, ebx, ecx, edx;
+
+ /* QoS sub-leaf, EAX=0Fh, ECX=1 */
+ cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
+
+ c->x86_cache_max_rmid = ecx;
+ c->x86_cache_occ_scale = ebx;
+
+ if (c->x86_vendor == X86_VENDOR_INTEL)
+ c->x86_cache_mbm_width_offset = eax & 0xff;
+ }
+}
+
static int __init resctrl_late_init(void)
{
struct rdt_resource *r;
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index d7623e1b927d..4bd28b388a1a 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -24,7 +24,7 @@

#include <asm/cacheflush.h>
#include <asm/intel-family.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
#include <asm/perf_event.h>

#include "../../events/perf_event.h" /* For X86_CONFIG() */
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 5a359d9fcc05..6276ae015945 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -29,7 +29,7 @@

#include <uapi/linux/magic.h>

-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
#include "internal.h"

DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 5ef9d8f25b0e..0c169a5687e1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>
#include <asm/vdso.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
#include <asm/unistd.h>
#include <asm/fsgsbase.h>
#ifdef CONFIG_IA32_EMULATION

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2020-05-05 04:23:28

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/resctrl: Support CPUID enumeration of MBM counter width

Hi Borislav,

On 5/4/2020 12:56 AM, Borislav Petkov wrote:
> Hi,
>
> On Sun, May 03, 2020 at 11:51:00AM -0700, Reinette Chatre wrote:
>> I am struggling with what should follow ...
>
> Since a diff is better than a thousand words :-) see below.
>

Thank you so much for providing the details. Your explanation is clear
to me but I do have one clarification question ...


> @@ -597,6 +598,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
> x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
> }
> }
> +
> + resctrl_cpu_detect(c);
> }
>

...

> @@ -322,6 +323,11 @@ static void early_init_intel(struct cpuinfo_x86 *c)
> detect_ht_early(c);
> }
>
> +static void bsp_init_intel(struct cpuinfo_x86 *c)
> +{
> + resctrl_cpu_detect(c);
> +}
> +
> #ifdef CONFIG_X86_32
> /*
> * Early probe support logic for ppro memory erratum #50
> @@ -961,6 +967,7 @@ static const struct cpu_dev intel_cpu_dev = {
> #endif
> .c_detect_tlb = intel_detect_tlb,
> .c_early_init = early_init_intel,
> + .c_bsp_init = bsp_init_intel,
> .c_init = init_intel,
> .c_x86_vendor = X86_VENDOR_INTEL,
> };
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index d8cc5223b7ce..5e5955aa6593 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -22,7 +22,7 @@
> #include <linux/cpuhotplug.h>
>
> #include <asm/intel-family.h>
> -#include <asm/resctrl_sched.h>
> +#include <asm/resctrl.h>
> #include "internal.h"
>
> /* Mutex to protect rdtgroup access. */
> @@ -958,6 +958,35 @@ static __init void rdt_init_res_defs(void)
>
> static enum cpuhp_state rdt_online;
>
> +/* Runs once on the BSP during boot. */
> +void resctrl_cpu_detect(struct cpuinfo_x86 *c)
> +{
> + if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
> + c->x86_cache_max_rmid = -1;
> + c->x86_cache_occ_scale = -1;
> + c->x86_cache_mbm_width_offset = -1;
> + return;
> + }
> +
> + /* will be overridden if occupancy monitoring exists */
> + c->x86_cache_max_rmid = cpuid_ebx(0xf);
> +
> + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
> + cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
> + cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
> + u32 eax, ebx, ecx, edx;
> +
> + /* QoS sub-leaf, EAX=0Fh, ECX=1 */
> + cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
> +
> + c->x86_cache_max_rmid = ecx;
> + c->x86_cache_occ_scale = ebx;
> +
> + if (c->x86_vendor == X86_VENDOR_INTEL)
> + c->x86_cache_mbm_width_offset = eax & 0xff;
> + }
> +}
> +

resctrl_cpu_detect() is now identical among vendors. Do we still need
the c_bsp_init helpers? Could we not perhaps call resctrl_cpu_detect()
directly from early_identify_cpu()?

Thank you

Reinette

2020-05-05 22:17:49

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/resctrl: Support CPUID enumeration of MBM counter width

Hi Borislav,

On 5/4/2020 9:19 PM, Reinette Chatre wrote:
> On 5/4/2020 12:56 AM, Borislav Petkov wrote:
>> Hi,
>>
>> On Sun, May 03, 2020 at 11:51:00AM -0700, Reinette Chatre wrote:
>>> I am struggling with what should follow ...
>>
>> Since a diff is better than a thousand words :-) see below.
>>
>
> Thank you so much for providing the details. Your explanation is clear
> to me but I do have one clarification question ...
>
>
>> @@ -597,6 +598,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
>> x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
>> }
>> }
>> +
>> + resctrl_cpu_detect(c);
>> }
>>
>
> ...
>
>> @@ -322,6 +323,11 @@ static void early_init_intel(struct cpuinfo_x86 *c)
>> detect_ht_early(c);
>> }
>>
>> +static void bsp_init_intel(struct cpuinfo_x86 *c)
>> +{
>> + resctrl_cpu_detect(c);
>> +}
>> +
>> #ifdef CONFIG_X86_32
>> /*
>> * Early probe support logic for ppro memory erratum #50
>> @@ -961,6 +967,7 @@ static const struct cpu_dev intel_cpu_dev = {
>> #endif
>> .c_detect_tlb = intel_detect_tlb,
>> .c_early_init = early_init_intel,
>> + .c_bsp_init = bsp_init_intel,
>> .c_init = init_intel,
>> .c_x86_vendor = X86_VENDOR_INTEL,
>> };
>> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
>> index d8cc5223b7ce..5e5955aa6593 100644
>> --- a/arch/x86/kernel/cpu/resctrl/core.c
>> +++ b/arch/x86/kernel/cpu/resctrl/core.c
>> @@ -22,7 +22,7 @@
>> #include <linux/cpuhotplug.h>
>>
>> #include <asm/intel-family.h>
>> -#include <asm/resctrl_sched.h>
>> +#include <asm/resctrl.h>
>> #include "internal.h"
>>
>> /* Mutex to protect rdtgroup access. */
>> @@ -958,6 +958,35 @@ static __init void rdt_init_res_defs(void)
>>
>> static enum cpuhp_state rdt_online;
>>
>> +/* Runs once on the BSP during boot. */
>> +void resctrl_cpu_detect(struct cpuinfo_x86 *c)
>> +{
>> + if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
>> + c->x86_cache_max_rmid = -1;
>> + c->x86_cache_occ_scale = -1;
>> + c->x86_cache_mbm_width_offset = -1;
>> + return;
>> + }
>> +
>> + /* will be overridden if occupancy monitoring exists */
>> + c->x86_cache_max_rmid = cpuid_ebx(0xf);
>> +
>> + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
>> + cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
>> + cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
>> + u32 eax, ebx, ecx, edx;
>> +
>> + /* QoS sub-leaf, EAX=0Fh, ECX=1 */
>> + cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
>> +
>> + c->x86_cache_max_rmid = ecx;
>> + c->x86_cache_occ_scale = ebx;
>> +
>> + if (c->x86_vendor == X86_VENDOR_INTEL)
>> + c->x86_cache_mbm_width_offset = eax & 0xff;
>> + }
>> +}
>> +
>
> resctrl_cpu_detect() is now identical among vendors. Do we still need
> the c_bsp_init helpers? Could we not perhaps call resctrl_cpu_detect()
> directly from early_identify_cpu()?
>

I should have given this more thought ... even though the function will
be identical between the two vendors it does contain vendor-specific
code, thus needing to be called from the c_bsp_init helpers? I will
resubmit the new series that intends to follow all your suggestions shortly.

Thank you very much for your feedback

Reinette