Changes from V3 (https://lkml.org/lkml/2017/7/24/2)
* Use a cleaner version of refactor patch 1/2 from Boris instead.
Changes from V2 (https://lkml.org/lkml/2017/7/21/730)
* In patch 1/2
* Fix kbuild error for the __get_topoext() due to #ifdef CONFIG_SMP.
* Do not move node_id declaration in the refactored function.
Changes from V1 (https://lkml.org/lkml/2017/7/20/180)
* Refactor topology extension logic into __get_topoext() (per Boris)
Borislav Petkov (1):
x86/amd: Refactor topology extension related code
Suravee Suthikulpanit (1):
x86/amd: Fixup cpu_core_id for family17h downcore configuration
arch/x86/kernel/cpu/amd.c | 110 ++++++++++++++++++++++++++++++++--------------
1 file changed, 77 insertions(+), 33 deletions(-)
--
2.7.4
From: Borislav Petkov <[email protected]>
Refactoring in preparation for subsequent changes.
There is no functional change.
Signed-off-by: Suravee Suthikulpanit <[email protected]>
---
arch/x86/kernel/cpu/amd.c | 78 ++++++++++++++++++++++++++---------------------
1 file changed, 44 insertions(+), 34 deletions(-)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb5abe8..a2a52b5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -296,13 +296,55 @@ static int nearby_node(int apicid)
}
#endif
+#ifdef CONFIG_SMP
+
+/*
+ * Get topology information via X86_FEATURE_TOPOEXT.
+ */
+static void __get_topoext(struct cpuinfo_x86 *c, int cpu)
+{
+ u32 eax, ebx, ecx, edx;
+ u8 node_id;
+
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+
+ node_id = ecx & 0xff;
+ smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
+
+ if (c->x86 == 0x15)
+ c->cu_id = ebx & 0xff;
+
+ if (c->x86 >= 0x17) {
+ c->cpu_core_id = ebx & 0xff;
+
+ if (smp_num_siblings > 1)
+ c->x86_max_cores /= smp_num_siblings;
+ }
+
+ /*
+ * We may have multiple LLCs if L3 caches exist, so check if we
+ * have an L3 cache by looking at the L3 cache CPUID leaf.
+ */
+ if (cpuid_edx(0x80000006)) {
+ if (c->x86 == 0x17) {
+ /*
+ * LLC is at the core complex level.
+ * Core complex id is ApicId[3].
+ */
+ per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
+ } else {
+ /* LLC is at the node level. */
+ per_cpu(cpu_llc_id, cpu) = node_id;
+ }
+ }
+}
+
/*
* Fixup core topology information for
* (1) AMD multi-node processors
* Assumption: Number of cores in each internal node is the same.
* (2) AMD processors supporting compute units
*/
-#ifdef CONFIG_SMP
static void amd_get_topology(struct cpuinfo_x86 *c)
{
u8 node_id;
@@ -310,39 +352,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
- u32 eax, ebx, ecx, edx;
-
- cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-
- node_id = ecx & 0xff;
- smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
-
- if (c->x86 == 0x15)
- c->cu_id = ebx & 0xff;
-
- if (c->x86 >= 0x17) {
- c->cpu_core_id = ebx & 0xff;
-
- if (smp_num_siblings > 1)
- c->x86_max_cores /= smp_num_siblings;
- }
-
- /*
- * We may have multiple LLCs if L3 caches exist, so check if we
- * have an L3 cache by looking at the L3 cache CPUID leaf.
- */
- if (cpuid_edx(0x80000006)) {
- if (c->x86 == 0x17) {
- /*
- * LLC is at the core complex level.
- * Core complex id is ApicId[3].
- */
- per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
- } else {
- /* LLC is at the node level. */
- per_cpu(cpu_llc_id, cpu) = node_id;
- }
- }
+ __get_topoext(c, cpu);
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
--
2.7.4
For family17h, current cpu_core_id is directly taken from the value
CPUID_Fn8000001E_EBX[7:0] (CoreId), which is the physical ID of the
core within a die. However, on system with downcore configuration
(where not all physical cores within a die are available), this could
result in the case where cpu_core_id > (cores_per_node - 1).
Fix up the cpu_core_id by breaking down the bitfields of CoreId,
and calculate relative ID using available topology information.
Signed-off-by: Suravee Suthikulpanit <[email protected]>
---
arch/x86/kernel/cpu/amd.c | 74 ++++++++++++++++++++++++++++++++++-------------
1 file changed, 54 insertions(+), 20 deletions(-)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a2a52b5..b5ea28f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -305,37 +305,71 @@ static void __get_topoext(struct cpuinfo_x86 *c, int cpu)
{
u32 eax, ebx, ecx, edx;
u8 node_id;
+ u16 l3_nshared = 0;
+
+ if (cpuid_edx(0x80000006)) {
+ cpuid_count(0x8000001d, 3, &eax, &ebx, &ecx, &edx);
+ l3_nshared = ((eax >> 14) & 0xfff) + 1;
+ }
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
node_id = ecx & 0xff;
smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
- if (c->x86 == 0x15)
- c->cu_id = ebx & 0xff;
-
- if (c->x86 >= 0x17) {
- c->cpu_core_id = ebx & 0xff;
-
- if (smp_num_siblings > 1)
- c->x86_max_cores /= smp_num_siblings;
- }
+ switch (c->x86) {
+ case 0x17: {
+ u32 tmp, ccx_offset, cpu_offset;
- /*
- * We may have multiple LLCs if L3 caches exist, so check if we
- * have an L3 cache by looking at the L3 cache CPUID leaf.
- */
- if (cpuid_edx(0x80000006)) {
- if (c->x86 == 0x17) {
+ /*
+ * In family 17h, the CPUID_Fn8000001E_EBX[7:0] (CoreId)
+ * is non-contiguous in downcore and non-SMT cases.
+ * Fixup the cpu_core_id to be contiguous for cores within
+ * the die.
+ */
+ tmp = ebx & 0xff;
+ if (smp_num_siblings == 1) {
/*
- * LLC is at the core complex level.
- * Core complex id is ApicId[3].
+ * CoreId bit-encoding for SMT-disabled
+ * [7:4] : die
+ * [3] : ccx
+ * [2:0] : core
*/
- per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
+ ccx_offset = ((tmp >> 3) & 1) * l3_nshared;
+ cpu_offset = tmp & 7;
} else {
- /* LLC is at the node level. */
- per_cpu(cpu_llc_id, cpu) = node_id;
+ /*
+ * CoreId bit-encoding for SMT-enabled
+ * [7:3] : die
+ * [2] : ccx
+ * [1:0] : core
+ */
+ ccx_offset = ((tmp >> 2) & 1) * l3_nshared /
+ smp_num_siblings;
+ cpu_offset = tmp & 3;
+ c->x86_max_cores /= smp_num_siblings;
+
}
+ c->cpu_core_id = ccx_offset + cpu_offset;
+
+ /*
+ * Family17h L3 cache (LLC) is at Core Complex (CCX).
+ * There could be multiple CCXs in a node.
+ * CCX ID is ApicId[3].
+ */
+ per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
+
+ pr_debug("Fixup coreid:%#x to cpu_core_id:%#x\n",
+ tmp, c->cpu_core_id);
+ break;
+ }
+ case 0x15:
+ c->cu_id = ebx & 0xff;
+ /* Follow through */
+ default:
+ /* LLC is default to L3, which generally per-node */
+ if (l3_nshared > 0)
+ per_cpu(cpu_llc_id, cpu) = node_id;
}
}
--
2.7.4
On Mon, Jul 24, 2017 at 04:22:45AM -0500, Suravee Suthikulpanit wrote:
> For family17h, current cpu_core_id is directly taken from the value
> CPUID_Fn8000001E_EBX[7:0] (CoreId), which is the physical ID of the
> core within a die. However, on system with downcore configuration
> (where not all physical cores within a die are available), this could
> result in the case where cpu_core_id > (cores_per_node - 1).
And that is a problem because...?
> Fix up the cpu_core_id by breaking down the bitfields of CoreId,
> and calculate relative ID using available topology information.
>
> Signed-off-by: Suravee Suthikulpanit <[email protected]>
> ---
> arch/x86/kernel/cpu/amd.c | 74 ++++++++++++++++++++++++++++++++++-------------
> 1 file changed, 54 insertions(+), 20 deletions(-)
Btw, I have to say, it is much easier to review now.
> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index a2a52b5..b5ea28f 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -305,37 +305,71 @@ static void __get_topoext(struct cpuinfo_x86 *c, int cpu)
> {
> u32 eax, ebx, ecx, edx;
> u8 node_id;
> + u16 l3_nshared = 0;
> +
> + if (cpuid_edx(0x80000006)) {
Ok, so Janakarajan did some L3 detection:
https://lkml.kernel.org/r/[email protected]
and now that you need it too, I'd like you to refactor and unify that
L3 detection code and put it in arch/x86/kernel/cpu/intel_cacheinfo.c
(yes, we will rename that file one fine day :-)) along with accessors
for other users to call. init_amd_cacheinfo() looks good to me right
now.
> + cpuid_count(0x8000001d, 3, &eax, &ebx, &ecx, &edx);
> + l3_nshared = ((eax >> 14) & 0xfff) + 1;
> + }
>
> cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
>
> node_id = ecx & 0xff;
> smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
>
> - if (c->x86 == 0x15)
> - c->cu_id = ebx & 0xff;
> -
> - if (c->x86 >= 0x17) {
> - c->cpu_core_id = ebx & 0xff;
> -
> - if (smp_num_siblings > 1)
> - c->x86_max_cores /= smp_num_siblings;
> - }
> + switch (c->x86) {
> + case 0x17: {
> + u32 tmp, ccx_offset, cpu_offset;
>
> - /*
> - * We may have multiple LLCs if L3 caches exist, so check if we
> - * have an L3 cache by looking at the L3 cache CPUID leaf.
> - */
> - if (cpuid_edx(0x80000006)) {
> - if (c->x86 == 0x17) {
> + /*
> + * In family 17h, the CPUID_Fn8000001E_EBX[7:0] (CoreId)
> + * is non-contiguous in downcore and non-SMT cases.
> + * Fixup the cpu_core_id to be contiguous for cores within
> + * the die.
Why do we need it to be contiguous? It is not contiguous on Intel too.
> + */
> + tmp = ebx & 0xff;
> + if (smp_num_siblings == 1) {
> /*
> - * LLC is at the core complex level.
> - * Core complex id is ApicId[3].
> + * CoreId bit-encoding for SMT-disabled
> + * [7:4] : die
> + * [3] : ccx
> + * [2:0] : core
> */
> - per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
> + ccx_offset = ((tmp >> 3) & 1) * l3_nshared;
> + cpu_offset = tmp & 7;
> } else {
> - /* LLC is at the node level. */
> - per_cpu(cpu_llc_id, cpu) = node_id;
> + /*
> + * CoreId bit-encoding for SMT-enabled
> + * [7:3] : die
> + * [2] : ccx
> + * [1:0] : core
> + */
> + ccx_offset = ((tmp >> 2) & 1) * l3_nshared /
> + smp_num_siblings;
> + cpu_offset = tmp & 3;
> + c->x86_max_cores /= smp_num_siblings;
> +
> }
> + c->cpu_core_id = ccx_offset + cpu_offset;
> +
> + /*
> + * Family17h L3 cache (LLC) is at Core Complex (CCX).
> + * There could be multiple CCXs in a node.
> + * CCX ID is ApicId[3].
> + */
> + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
> +
> + pr_debug("Fixup coreid:%#x to cpu_core_id:%#x\n",
> + tmp, c->cpu_core_id);
> + break;
> + }
> + case 0x15:
> + c->cu_id = ebx & 0xff;
> + /* Follow through */
> + default:
> + /* LLC is default to L3, which generally per-node */
> + if (l3_nshared > 0)
> + per_cpu(cpu_llc_id, cpu) = node_id;
If this needs to be executed unconditionally, just move it out of the
switch-case.
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
Boris,
On 7/24/17 18:14, Borislav Petkov wrote:
> On Mon, Jul 24, 2017 at 04:22:45AM -0500, Suravee Suthikulpanit wrote:
>> For family17h, current cpu_core_id is directly taken from the value
>> CPUID_Fn8000001E_EBX[7:0] (CoreId), which is the physical ID of the
>> core within a die. However, on system with downcore configuration
>> (where not all physical cores within a die are available), this could
>> result in the case where cpu_core_id > (cores_per_node - 1).
>
> And that is a problem because...?
Actually, this is not totally accurate. My apology. This patch is mainly fix to
incorrect core ID in /proc/cpuinfo. I'll update the commit message accordingly
for V5.
Currently, with 6-core downcore configuration (out of normally 8), /proc/cpuinfo
is currently showing "core id" as.
NODE: 0
cpu 0 core id : 0
cpu 1 core id : 1
cpu 2 core id : 2
cpu 3 core id : 4
cpu 4 core id : 5
cpu 5 core id : 0
NODE: 1
cpu 6 core id : 2
cpu 7 core id : 3
cpu 8 core id : 4
cpu 9 core id : 0
cpu 10 core id : 1
cpu 11 core id : 2
NODE: ....
This is due to the cpu_core_id fixup in amd_get_topology() below:
/* fixup multi-node processor information */
if (nodes_per_socket > 1) {
u32 cus_per_node;
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
cus_per_node = c->x86_max_cores / nodes_per_socket;
/* core id has to be in the [0 .. cores_per_node - 1] range */
c->cpu_core_id %= cus_per_node;
}
In this case, the cpu_core_id are {0, 1, 2 ,4, 5, 6, 8 , 9, 10, 12, 13, 14,
...}. Here, the x86_max_cores is 24, the node_per_socket is 4, and cus_per_node
is 6. When apply the fix up, the cpu_core_id ended up incorrect as shown above.
This logic assumes that the cpu_core_id are contiguous across the socket.
With this patch, this fixup is not needed since the c->cpu_core_id are already
between [0, 6].
>> [...]
>>diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
>> index a2a52b5..b5ea28f 100644
>> --- a/arch/x86/kernel/cpu/amd.c
>> +++ b/arch/x86/kernel/cpu/amd.c
>> @@ -305,37 +305,71 @@ static void __get_topoext(struct cpuinfo_x86 *c, int cpu)
>> {
>> u32 eax, ebx, ecx, edx;
>> u8 node_id;
>> + u16 l3_nshared = 0;
>> +
>> + if (cpuid_edx(0x80000006)) {
>
> Ok, so Janakarajan did some L3 detection:
>
> https://lkml.kernel.org/r/[email protected]
>
> and now that you need it too, I'd like you to refactor and unify that
> L3 detection code and put it in arch/x86/kernel/cpu/intel_cacheinfo.c
> (yes, we will rename that file one fine day :-)) along with accessors
> for other users to call. init_amd_cacheinfo() looks good to me right
> now.
>
Let me look into this to re-factoring for reuse here.
>> + cpuid_count(0x8000001d, 3, &eax, &ebx, &ecx, &edx);
>> + l3_nshared = ((eax >> 14) & 0xfff) + 1;
>> + }
>>
>> cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
>>
>> node_id = ecx & 0xff;
>> smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
>>
>> - if (c->x86 == 0x15)
>> - c->cu_id = ebx & 0xff;
>> -
>> - if (c->x86 >= 0x17) {
>> - c->cpu_core_id = ebx & 0xff;
>> -
>> - if (smp_num_siblings > 1)
>> - c->x86_max_cores /= smp_num_siblings;
>> - }
>> + switch (c->x86) {
>> + case 0x17: {
>> + u32 tmp, ccx_offset, cpu_offset;
>>
>> - /*
>> - * We may have multiple LLCs if L3 caches exist, so check if we
>> - * have an L3 cache by looking at the L3 cache CPUID leaf.
>> - */
>> - if (cpuid_edx(0x80000006)) {
>> - if (c->x86 == 0x17) {
>> + /*
>> + * In family 17h, the CPUID_Fn8000001E_EBX[7:0] (CoreId)
>> + * is non-contiguous in downcore and non-SMT cases.
>> + * Fixup the cpu_core_id to be contiguous for cores within
>> + * the die.
>
> Why do we need it to be contiguous? It is not contiguous on Intel too.
Please see above description.
>> + */
>> + tmp = ebx & 0xff;
>> + if (smp_num_siblings == 1) {
>> /*
>> - * LLC is at the core complex level.
>> - * Core complex id is ApicId[3].
>> + * CoreId bit-encoding for SMT-disabled
>> + * [7:4] : die
>> + * [3] : ccx
>> + * [2:0] : core
>> */
>> - per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
>> + ccx_offset = ((tmp >> 3) & 1) * l3_nshared;
>> + cpu_offset = tmp & 7;
>> } else {
>> - /* LLC is at the node level. */
>> - per_cpu(cpu_llc_id, cpu) = node_id;
>> + /*
>> + * CoreId bit-encoding for SMT-enabled
>> + * [7:3] : die
>> + * [2] : ccx
>> + * [1:0] : core
>> + */
>> + ccx_offset = ((tmp >> 2) & 1) * l3_nshared /
>> + smp_num_siblings;
>> + cpu_offset = tmp & 3;
>> + c->x86_max_cores /= smp_num_siblings;
>> +
>> }
>> + c->cpu_core_id = ccx_offset + cpu_offset;
>> +
>> + /*
>> + * Family17h L3 cache (LLC) is at Core Complex (CCX).
>> + * There could be multiple CCXs in a node.
>> + * CCX ID is ApicId[3].
>> + */
>> + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
>> +
>> + pr_debug("Fixup coreid:%#x to cpu_core_id:%#x\n",
>> + tmp, c->cpu_core_id);
>> + break;
>> + }
>> + case 0x15:
>> + c->cu_id = ebx & 0xff;
>> + /* Follow through */
>> + default:
>> + /* LLC is default to L3, which generally per-node */
>> + if (l3_nshared > 0)
>> + per_cpu(cpu_llc_id, cpu) = node_id;
>
> If this needs to be executed unconditionally, just move it out of the
> switch-case.
>
Well, for now, it has to be for non-family17h, which seems odd. I am planning to
clean this up as well.
Thanks,
Suravee
On Mon, Jul 24, 2017 at 09:14:18PM +0700, Suravee Suthikulpanit wrote:
> Actually, this is not totally accurate. My apology. This patch is
> mainly fix to incorrect core ID in /proc/cpuinfo.
So you're "fixing" only some numbering thing. Because core_id doesn't
have any influence on anything. Here's on an Intel box I have here:
processor : 0 physical id : 0 core id : 0
processor : 1 physical id : 1 core id : 0
processor : 2 physical id : 2 core id : 0
processor : 3 physical id : 3 core id : 0
processor : 4 physical id : 0 core id : 8
processor : 5 physical id : 1 core id : 8
processor : 6 physical id : 2 core id : 8
processor : 7 physical id : 3 core id : 8
processor : 8 physical id : 0 core id : 2
processor : 9 physical id : 1 core id : 2
processor : 10 physical id : 2 core id : 2
processor : 11 physical id : 3 core id : 2
processor : 12 physical id : 0 core id : 10
processor : 13 physical id : 1 core id : 10
processor : 14 physical id : 2 core id : 10
processor : 15 physical id : 3 core id : 10
processor : 16 physical id : 0 core id : 1
processor : 17 physical id : 1 core id : 1
processor : 18 physical id : 2 core id : 1
processor : 19 physical id : 3 core id : 1
processor : 20 physical id : 0 core id : 9
processor : 21 physical id : 1 core id : 9
processor : 22 physical id : 2 core id : 9
processor : 23 physical id : 3 core id : 9
processor : 24 physical id : 0 core id : 3
processor : 25 physical id : 1 core id : 3
processor : 26 physical id : 2 core id : 3
processor : 27 physical id : 3 core id : 3
processor : 28 physical id : 0 core id : 11
processor : 29 physical id : 1 core id : 11
processor : 30 physical id : 2 core id : 11
processor : 31 physical id : 3 core id : 11
processor : 32 physical id : 0 core id : 0
processor : 33 physical id : 1 core id : 0
processor : 34 physical id : 2 core id : 0
processor : 35 physical id : 3 core id : 0
processor : 36 physical id : 0 core id : 8
processor : 37 physical id : 1 core id : 8
processor : 38 physical id : 2 core id : 8
processor : 39 physical id : 3 core id : 8
processor : 40 physical id : 0 core id : 2
processor : 41 physical id : 1 core id : 2
processor : 42 physical id : 2 core id : 2
processor : 43 physical id : 3 core id : 2
processor : 44 physical id : 0 core id : 10
processor : 45 physical id : 1 core id : 10
processor : 46 physical id : 2 core id : 10
processor : 47 physical id : 3 core id : 10
processor : 48 physical id : 0 core id : 1
processor : 49 physical id : 1 core id : 1
processor : 50 physical id : 2 core id : 1
processor : 51 physical id : 3 core id : 1
processor : 52 physical id : 0 core id : 9
processor : 53 physical id : 1 core id : 9
processor : 54 physical id : 2 core id : 9
processor : 55 physical id : 3 core id : 9
processor : 56 physical id : 0 core id : 3
processor : 57 physical id : 1 core id : 3
processor : 58 physical id : 2 core id : 3
processor : 59 physical id : 3 core id : 3
processor : 60 physical id : 0 core id : 11
processor : 61 physical id : 1 core id : 11
processor : 62 physical id : 2 core id : 11
processor : 63 physical id : 3 core id : 11
So those core id numbers can be anything as long as the cpumasks used by
the scheduler are correct.
> This is due to the cpu_core_id fixup in amd_get_topology() below:
>
> /* fixup multi-node processor information */
> if (nodes_per_socket > 1) {
> u32 cus_per_node;
>
> set_cpu_cap(c, X86_FEATURE_AMD_DCM);
> cus_per_node = c->x86_max_cores / nodes_per_socket;
>
> /* core id has to be in the [0 .. cores_per_node - 1] range */
> c->cpu_core_id %= cus_per_node;
> }
AFAICT, Andreas did this for MC at the time:
4a376ec3a259 ("x86: Fix CPU llc_shared_map information for AMD Magny-Cours")
but I don't think we need to care about core_ids fitting into the node
range anymore. For the above reason - topology doesn't use core ids.
So you can just as well let ->cpu_core_id be derived from the
->initial_apicid as it is being done now in amd_detect_cmp().
In order not to cause any more confusion, you can limit the above fixup
to anything below F17h so that we don't upset existing users and add a
big fat comment as to why we're doing this. But if it is only a silly
numbering thing, I don't see the need for doing that jumping through
hoops.
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
Boris,
On 7/24/17 21:44, Borislav Petkov wrote:
> On Mon, Jul 24, 2017 at 09:14:18PM +0700, Suravee Suthikulpanit wrote:
>> Actually, this is not totally accurate. My apology. This patch is
>> mainly fix to incorrect core ID in /proc/cpuinfo.
>
> So you're "fixing" only some numbering thing. Because core_id doesn't
> have any influence on anything. Here's on an Intel box I have here:
>
> processor : 0 physical id : 0 core id : 0
> processor : 1 physical id : 1 core id : 0
> processor : 2 physical id : 2 core id : 0
> processor : 3 physical id : 3 core id : 0
> processor : 4 physical id : 0 core id : 8
> processor : 5 physical id : 1 core id : 8
> processor : 6 physical id : 2 core id : 8
> processor : 7 physical id : 3 core id : 8
> processor : 8 physical id : 0 core id : 2
> processor : 9 physical id : 1 core id : 2
> processor : 10 physical id : 2 core id : 2
> processor : 11 physical id : 3 core id : 2
> processor : 12 physical id : 0 core id : 10
> processor : 13 physical id : 1 core id : 10
> processor : 14 physical id : 2 core id : 10
> processor : 15 physical id : 3 core id : 10
>
> [....]
>
> So those core id numbers can be anything as long as the cpumasks used by
> the scheduler are correct.
Ok. Sure, it doesn't need be contiguous. But at least the cpu_core_id should
represent an ID that make some sense since it is used in the
arch/x86/kernel/smpboot.c: match_smt() and some other places. So, if it's
invalid for the downcore configuration (i.e. duplicated where it should not be),
we should at least clean this up.
>> This is due to the cpu_core_id fixup in amd_get_topology() below:
>>
>> /* fixup multi-node processor information */
>> if (nodes_per_socket > 1) {
>> u32 cus_per_node;
>>
>> set_cpu_cap(c, X86_FEATURE_AMD_DCM);
>> cus_per_node = c->x86_max_cores / nodes_per_socket;
>>
>> /* core id has to be in the [0 .. cores_per_node - 1] range */
>> c->cpu_core_id %= cus_per_node;
>> }
>
> AFAICT, Andreas did this for MC at the time:
>
> 4a376ec3a259 ("x86: Fix CPU llc_shared_map information for AMD Magny-Cours")
>
> but I don't think we need to care about core_ids fitting into the node
> range anymore. For the above reason - topology doesn't use core ids.
Agree to the point that it does not need to be fitting into the node range.
> So you can just as well let ->cpu_core_id be derived from the
> ->initial_apicid as it is being done now in amd_detect_cmp().
Actually, for family17h, this is from the CPUID_Fn8000001E_EBX[CoreId]. But I
get your point.
> In order not to cause any more confusion, you can limit the above fixup
> to anything below F17h so that we don't upset existing users and add a
> big fat comment as to why we're doing this. But if it is only a silly
> numbering thing, I don't see the need for doing that jumping through
> hoops.
>
I will update the patch to only limit the fixup to pre-family17h.
Thanks,
Suravee
On Tue, Jul 25, 2017 at 12:51:53PM +0700, Suravee Suthikulpanit wrote:
> Ok. Sure, it doesn't need be contiguous. But at least the cpu_core_id should
> represent an ID that make some sense since it is used in the
> arch/x86/kernel/smpboot.c: match_smt() and some other places. So, if it's
> invalid for the downcore configuration (i.e. duplicated where it should not
> be), we should at least clean this up.
Ah right, we do use it for the SMT siblings. So yes, it should be
correct for them. And I'm pretty sure the numbers we derive from the
initial APIC ID are already good enough for that.
> I will update the patch to only limit the fixup to pre-family17h.
Yeah, give that a try. Make sure to check the topology masks are
correct.
Thanks.
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
Boris,
On 7/25/17 12:56, Borislav Petkov wrote:
> On Tue, Jul 25, 2017 at 12:51:53PM +0700, Suravee Suthikulpanit wrote:
>> Ok. Sure, it doesn't need be contiguous. But at least the cpu_core_id should
>> represent an ID that make some sense since it is used in the
>> arch/x86/kernel/smpboot.c: match_smt() and some other places. So, if it's
>> invalid for the downcore configuration (i.e. duplicated where it should not
>> be), we should at least clean this up.
> Ah right, we do use it for the SMT siblings. So yes, it should be
> correct for them. And I'm pretty sure the numbers we derive from the
> initial APIC ID are already good enough for that.
>
commit 08b259631b5a1d912af4832847b5642f377d9101
Author: Yazen Ghannam <[email protected]>
Date: Sun Feb 5 11:50:22 2017 +0100
x86/CPU/AMD: Fix Zen SMT topology
After:
a33d331761bc ("x86/CPU/AMD: Fix Bulldozer topology")
our SMT scheduling topology for Fam17h systems is broken, because
the ThreadId is included in the ApicId when SMT is enabled.
So, without further decoding cpu_core_id is unique for each thread
rather than the same for threads on the same core. This didn't affect
systems with SMT disabled. Make cpu_core_id be what it is defined to be.
Actually, this commit change how we derive the cpu_core_id fro family17h to use
CPUID_Fn8000001E_EBX instead of from APIC ID for family17h and later.
Thanks,
Suravee
On Tue, Jul 25, 2017 at 01:02:49PM +0700, Suravee Suthikulpanit wrote:
> Actually, this commit change how we derive the cpu_core_id fro family17h to
> use CPUID_Fn8000001E_EBX instead of from APIC ID for family17h and later.
Is the CPUID info correct also for downcored, MCM configurations? If so,
I guess you could exit early from amd_get_topology() on F17h...
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
On Mon, Jul 24, 2017 at 04:44:45PM +0200, Borislav Petkov wrote:
> On Mon, Jul 24, 2017 at 09:14:18PM +0700, Suravee Suthikulpanit wrote:
> > Actually, this is not totally accurate. My apology. This patch is
> > mainly fix to incorrect core ID in /proc/cpuinfo.
>
> So you're "fixing" only some numbering thing. Because core_id doesn't
> have any influence on anything. Here's on an Intel box I have here:
>
> processor : 0 physical id : 0 core id : 0
> processor : 1 physical id : 1 core id : 0
---8<---
> processor : 62 physical id : 2 core id : 11
> processor : 63 physical id : 3 core id : 11
>
> So those core id numbers can be anything as long as the cpumasks used by
> the scheduler are correct.
And as long as the user is able to find consistent topology
information. (Ie. via /proc/cpuinfo and/or
/sys/bus/cpu/devices/cpuX/topology/)
> > This is due to the cpu_core_id fixup in amd_get_topology() below:
> >
> > /* fixup multi-node processor information */
> > if (nodes_per_socket > 1) {
> > u32 cus_per_node;
> >
> > set_cpu_cap(c, X86_FEATURE_AMD_DCM);
> > cus_per_node = c->x86_max_cores / nodes_per_socket;
> >
> > /* core id has to be in the [0 .. cores_per_node - 1] range */
> > c->cpu_core_id %= cus_per_node;
> > }
>
> AFAICT, Andreas did this for MC at the time:
>
> 4a376ec3a259 ("x86: Fix CPU llc_shared_map information for AMD Magny-Cours")
>
> but I don't think we need to care about core_ids fitting into the node
> range anymore. For the above reason - topology doesn't use core ids.
IIRC this was the way to fix llc_shared_map as CPUID functions could
not be used to figure which cores belong to the same compute unit.
> So you can just as well let ->cpu_core_id be derived from the
> ->initial_apicid as it is being done now in amd_detect_cmp().
>
> In order not to cause any more confusion, you can limit the above fixup
> to anything below F17h so that we don't upset existing users and add a
> big fat comment as to why we're doing this. But if it is only a silly
> numbering thing, I don't see the need for doing that jumping through
> hoops.
>
> --
> Regards/Gruss,
> Boris.
Regards,
Andreas