From: Thomas Gleixner <[email protected]>
detect_extended_topology() along with it's early() variant is a classic
example for duct tape engineering:
- It evaluates an array of subleafs with a boatload of local variables
for the relevant topology levels instead of using an array to save the
enumerated information and propagate it to the right level
- It has no boundary checks for subleafs
- It prevents updating the die_id with a crude workaround instead of
checking for leaf 0xb which does not provide die information.
- It's broken vs. the number of dies evaluation as it uses:
num_processors[DIE_LEVEL] / num_processors[CORE_LEVEL]
which "works" only correctly if there is none of the intermediate
topology levels (MODULE/TILE) enumerated.
There is zero value in trying to "fix" that code as the only proper fix is
to rewrite it from scratch.
Implement a sane parser with proper code documentation, which will be used
for the consolidated topology evaluation in the next step.
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Juergen Gross <[email protected]>
Tested-by: Sohil Mehta <[email protected]>
Tested-by: Michael Kelley <[email protected]>
---
arch/x86/kernel/cpu/Makefile | 2
arch/x86/kernel/cpu/topology.h | 12 +++
arch/x86/kernel/cpu/topology_ext.c | 130 +++++++++++++++++++++++++++++++++++++
3 files changed, 143 insertions(+), 1 deletion(-)
create mode 100644 arch/x86/kernel/cpu/topology_ext.c
---
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -18,7 +18,7 @@ KMSAN_SANITIZE_common.o := n
KCSAN_SANITIZE_common.o := n
obj-y := cacheinfo.o scattered.o
-obj-y += topology_common.o topology.o
+obj-y += topology_common.o topology_ext.o topology.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
--- a/arch/x86/kernel/cpu/topology.h
+++ b/arch/x86/kernel/cpu/topology.h
@@ -16,6 +16,7 @@ void cpu_init_topology(struct cpuinfo_x8
void cpu_parse_topology(struct cpuinfo_x86 *c);
void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
unsigned int shift, unsigned int ncpus);
+bool cpu_parse_topology_ext(struct topo_scan *tscan);
static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom)
{
@@ -36,4 +37,15 @@ static inline u32 topo_domain_mask(enum
return (1U << x86_topo_system.dom_shifts[dom]) - 1;
}
+/*
+ * Update a domain level after the fact without propagating. Used to fixup
+ * broken CPUID enumerations.
+ */
+static inline void topology_update_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
+ unsigned int shift, unsigned int ncpus)
+{
+ tscan->dom_shifts[dom] = shift;
+ tscan->dom_ncpus[dom] = ncpus;
+}
+
#endif /* ARCH_X86_TOPOLOGY_H */
--- /dev/null
+++ b/arch/x86/kernel/cpu/topology_ext.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpu.h>
+
+#include <asm/apic.h>
+#include <asm/memtype.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+enum topo_types {
+ INVALID_TYPE = 0,
+ SMT_TYPE = 1,
+ CORE_TYPE = 2,
+ MAX_TYPE_0B = 3,
+ MODULE_TYPE = 3,
+ TILE_TYPE = 4,
+ DIE_TYPE = 5,
+ DIEGRP_TYPE = 6,
+ MAX_TYPE_1F = 7,
+};
+
+/*
+ * Use a lookup table for the case that there are future types > 6 which
+ * describe an intermediate domain level which does not exist today.
+ */
+static const unsigned int topo_domain_map_0b_1f[MAX_TYPE_1F] = {
+ [SMT_TYPE] = TOPO_SMT_DOMAIN,
+ [CORE_TYPE] = TOPO_CORE_DOMAIN,
+ [MODULE_TYPE] = TOPO_MODULE_DOMAIN,
+ [TILE_TYPE] = TOPO_TILE_DOMAIN,
+ [DIE_TYPE] = TOPO_DIE_DOMAIN,
+ [DIEGRP_TYPE] = TOPO_DIEGRP_DOMAIN,
+};
+
+static inline bool topo_subleaf(struct topo_scan *tscan, u32 leaf, u32 subleaf,
+ unsigned int *last_dom)
+{
+ unsigned int dom, maxtype;
+ const unsigned int *map;
+ struct {
+ // eax
+ u32 x2apic_shift : 5, // Number of bits to shift APIC ID right
+ // for the topology ID at the next level
+ : 27; // Reserved
+ // ebx
+ u32 num_processors : 16, // Number of processors at current level
+ : 16; // Reserved
+ // ecx
+ u32 level : 8, // Current topology level. Same as sub leaf number
+ type : 8, // Level type. If 0, invalid
+ : 16; // Reserved
+ // edx
+ u32 x2apic_id : 32; // X2APIC ID of the current logical processor
+ } sl;
+
+ switch (leaf) {
+ case 0x0b: maxtype = MAX_TYPE_0B; map = topo_domain_map_0b_1f; break;
+ case 0x1f: maxtype = MAX_TYPE_1F; map = topo_domain_map_0b_1f; break;
+ default: return false;
+ }
+
+ cpuid_subleaf(leaf, subleaf, &sl);
+
+ if (!sl.num_processors || sl.type == INVALID_TYPE)
+ return false;
+
+ if (sl.type >= maxtype) {
+ pr_err_once("Topology: leaf 0x%x:%d Unknown domain type %u\n",
+ leaf, subleaf, sl.type);
+ /*
+ * It really would have been too obvious to make the domain
+ * type space sparse and leave a few reserved types between
+ * the points which might change instead of following the
+ * usual "this can be fixed in software" principle.
+ */
+ dom = *last_dom + 1;
+ } else {
+ dom = map[sl.type];
+ *last_dom = dom;
+ }
+
+ if (!dom) {
+ tscan->c->topo.initial_apicid = sl.x2apic_id;
+ } else if (tscan->c->topo.initial_apicid != sl.x2apic_id) {
+ pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf %d APIC ID mismatch %x != %x\n",
+ leaf, subleaf, tscan->c->topo.initial_apicid, sl.x2apic_id);
+ }
+
+ topology_set_dom(tscan, dom, sl.x2apic_shift, sl.num_processors);
+ return true;
+}
+
+static bool parse_topology_leaf(struct topo_scan *tscan, u32 leaf)
+{
+ unsigned int last_dom;
+ u32 subleaf;
+
+ /* Read all available subleafs and populate the levels */
+ for (subleaf = 0, last_dom = 0; topo_subleaf(tscan, leaf, subleaf, &last_dom); subleaf++);
+
+ /* If subleaf 0 failed to parse, give up */
+ if (!subleaf)
+ return false;
+
+ /*
+ * There are machines in the wild which have shift 0 in the subleaf
+ * 0, but advertise 2 logical processors at that level. They are
+ * truly SMT.
+ */
+ if (!tscan->dom_shifts[TOPO_SMT_DOMAIN] && tscan->dom_ncpus[TOPO_SMT_DOMAIN] > 1) {
+ unsigned int sft = get_count_order(tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
+
+ pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf 0 has shift level 0 but %u CPUs\n",
+ leaf, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
+ topology_update_dom(tscan, TOPO_SMT_DOMAIN, sft, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
+ }
+
+ set_cpu_cap(tscan->c, X86_FEATURE_XTOPOLOGY);
+ return true;
+}
+
+bool cpu_parse_topology_ext(struct topo_scan *tscan)
+{
+ /* Intel: Try leaf 0x1F first. */
+ if (tscan->c->cpuid_level >= 0x1f && parse_topology_leaf(tscan, 0x1f))
+ return true;
+
+ /* Intel/AMD: Fall back to leaf 0xB if available */
+ return tscan->c->cpuid_level >= 0x0b && parse_topology_leaf(tscan, 0x0b);
+}
On Tue, Jan 23, 2024 at 01:53:39PM +0100, Thomas Gleixner wrote:
> +static inline bool topo_subleaf(struct topo_scan *tscan, u32 leaf, u32 subleaf,
"parse_topo_subleaf"?
With a verb in the name...
> + unsigned int *last_dom)
> +{
> + unsigned int dom, maxtype;
> + const unsigned int *map;
> + struct {
> + // eax
Can we please not use those yucky // comments together with the
multiline ones?
> + u32 x2apic_shift : 5, // Number of bits to shift APIC ID right
> + // for the topology ID at the next level
> + : 27; // Reserved
> + // ebx
> + u32 num_processors : 16, // Number of processors at current level
> + : 16; // Reserved
> + // ecx
> + u32 level : 8, // Current topology level. Same as sub leaf number
> + type : 8, // Level type. If 0, invalid
> + : 16; // Reserved
> + // edx
> + u32 x2apic_id : 32; // X2APIC ID of the current logical processor
> + } sl;
..
> +static bool parse_topology_leaf(struct topo_scan *tscan, u32 leaf)
> +{
> + unsigned int last_dom;
> + u32 subleaf;
> +
> + /* Read all available subleafs and populate the levels */
> + for (subleaf = 0, last_dom = 0; topo_subleaf(tscan, leaf, subleaf, &last_dom); subleaf++);
> +
> + /* If subleaf 0 failed to parse, give up */
> + if (!subleaf)
> + return false;
> +
> + /*
> + * There are machines in the wild which have shift 0 in the subleaf
> + * 0, but advertise 2 logical processors at that level. They are
> + * truly SMT.
> + */
> + if (!tscan->dom_shifts[TOPO_SMT_DOMAIN] && tscan->dom_ncpus[TOPO_SMT_DOMAIN] > 1) {
> + unsigned int sft = get_count_order(tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
> +
> + pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf 0 has shift level 0 but %u CPUs\n",
> + leaf, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
Do you really wanna warn about that? Hoping that someone would do
something about it while there's time...?
> + topology_update_dom(tscan, TOPO_SMT_DOMAIN, sft, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
> + }
> +
> + set_cpu_cap(tscan->c, X86_FEATURE_XTOPOLOGY);
> + return true;
> +}
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Tue, Jan 30 2024 at 20:31, Borislav Petkov wrote:
> On Tue, Jan 23, 2024 at 01:53:39PM +0100, Thomas Gleixner wrote:
>> +static inline bool topo_subleaf(struct topo_scan *tscan, u32 leaf, u32 subleaf,
>
> "parse_topo_subleaf"?
>
> With a verb in the name...
>
>> + unsigned int *last_dom)
>> +{
>> + unsigned int dom, maxtype;
>> + const unsigned int *map;
>> + struct {
>> + // eax
>
> Can we please not use those yucky // comments together with the
> multiline ones?
TBH, the // comment style is really better for struct definitions. It's
denser and easier to parse.
// eax
u32 x2apic_shift : 5, // Number of bits to shift APIC ID right
// for the topology ID at the next level
: 27; // Reserved
// ebx
u32 num_processors : 16, // Number of processors at current level
: 16; // Reserved
versus:
/* eax */
u32 x2apic_shift : 5, /*
* Number of bits to shift APIC ID right
* for the topology ID at the next level
*/
: 27; /* Reserved */
/* ebx */
u32 num_processors : 16, /* Number of processors at current level */
: 16; /* Reserved */
Especially x2apic_shift is horrible and the comments of EBX are visually
impaired while with the C++ comments x2apic_shift looks natural and the
EBX comments are just open to the right and therefore simpler.
>> + if (!tscan->dom_shifts[TOPO_SMT_DOMAIN] && tscan->dom_ncpus[TOPO_SMT_DOMAIN] > 1) {
>> + unsigned int sft = get_count_order(tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
>> +
>> + pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf 0 has shift level 0 but %u CPUs\n",
>> + leaf, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
>
> Do you really wanna warn about that? Hoping that someone would do
> something about it while there's time...?
If it's caught in early testing, this should be fixed, no?
Thanks,
tglx
On Mon, Feb 12, 2024 at 03:17:45PM +0100, Thomas Gleixner wrote:
> Especially x2apic_shift is horrible and the comments of EBX are visually
> impaired while with the C++ comments x2apic_shift looks natural and the
> EBX comments are just open to the right and therefore simpler.
I'd say, put comments *above* the member versus on the side. We don't
like side comments, if you remember. :-)
And, for example, the commenting in arch/x86/include/asm/fpu/types.h is
not half as bad and works real nice for struct definitions, I'd say.
But if you want to make that into a rule to have C++, side comments for
struct members I guess I'll get accustomed to it eventually.
> If it's caught in early testing, this should be fixed, no?
Hope dies last. :)
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Mon, Feb 12, 2024 at 04:03:41PM +0100, Thomas Gleixner wrote:
> Aside of that it would make the struct generator in the CPUID data base
> more complex.
Hmm, that's a valid point... that thing being XML is already complex.
:-P
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Mon, Feb 12 2024 at 16:00, Borislav Petkov wrote:
> On Mon, Feb 12, 2024 at 03:17:45PM +0100, Thomas Gleixner wrote:
>> Especially x2apic_shift is horrible and the comments of EBX are visually
>> impaired while with the C++ comments x2apic_shift looks natural and the
>> EBX comments are just open to the right and therefore simpler.
>
> I'd say, put comments *above* the member versus on the side. We don't
> like side comments, if you remember. :-)
In code, no. For struct definitions if they are strictly tabular
formatted, they are actually nice as they are more compact and take less
space than the above member variant.
// eax
u32 x2apic_shift : 5, // Number of bits to shift APIC ID right
// for the topology ID at the next level
: 27; // Reserved
// ebx
u32 num_processors : 16, // Number of processors at current level
: 16; // Reserved
versus:
/* eax */
/*
* Number of bits to shift APIC ID right for the topology ID
* at the next level
*/
u32 x2apic_shift : 5,
/* Reserved */
: 27;
/* ebx */
/* Number of processors at current level */
u32 num_processors : 16,
/* Reserved */
: 16;
This really makes my eyes bleed.
On Mon, Feb 12 2024 at 15:17, Thomas Gleixner wrote:
> On Tue, Jan 30 2024 at 20:31, Borislav Petkov wrote:
> TBH, the // comment style is really better for struct definitions. It's
> denser and easier to parse.
>
> // eax
> u32 x2apic_shift : 5, // Number of bits to shift APIC ID right
> // for the topology ID at the next level
> : 27; // Reserved
> // ebx
> u32 num_processors : 16, // Number of processors at current level
> : 16; // Reserved
>
> versus:
>
> /* eax */
> u32 x2apic_shift : 5, /*
> * Number of bits to shift APIC ID right
> * for the topology ID at the next level
> */
> : 27; /* Reserved */
>
> /* ebx */
> u32 num_processors : 16, /* Number of processors at current level */
> : 16; /* Reserved */
>
> Especially x2apic_shift is horrible and the comments of EBX are visually
> impaired while with the C++ comments x2apic_shift looks natural and the
> EBX comments are just open to the right and therefore simpler.
Aside of that it would make the struct generator in the CPUID data base
more complex.
On Mon, Feb 12, 2024 at 04:08:31PM +0100, Thomas Gleixner wrote:
> This really makes my eyes bleed.
From: Borislav Petkov (AMD) <[email protected]>
Date: Mon Feb 12 16:41:42 2024 +0100
Documentation/maintainer-tip: Add C++ tail comments exception
Document when C++-style, tail comments should be used.
Signed-off-by: Borislav Petkov (AMD) <[email protected]>
diff --git a/Documentation/process/maintainer-tip.rst b/Documentation/process/maintainer-tip.rst
index 799359231b7f..497bb39727c8 100644
--- a/Documentation/process/maintainer-tip.rst
+++ b/Documentation/process/maintainer-tip.rst
@@ -480,7 +480,7 @@ Multi-line comments::
* Larger multi-line comments should be split into paragraphs.
*/
-No tail comments:
+No tail comments (see below):
Please refrain from using tail comments. Tail comments disturb the
reading flow in almost all contexts, but especially in code::
@@ -501,6 +501,34 @@ No tail comments:
/* This magic initialization needs a comment. Maybe not? */
seed = MAGIC_CONSTANT;
+ Use C++ style, tail comments when documenting structs in headers to
+ achieve a more compact layout and better readability::
+
+ // eax
+ u32 x2apic_shift : 5, // Number of bits to shift APIC ID right
+ // for the topology ID at the next level
+ : 27; // Reserved
+ // ebx
+ u32 num_processors : 16, // Number of processors at current level
+ : 16; // Reserved
+
+ versus::
+
+ /* eax */
+ /*
+ * Number of bits to shift APIC ID right for the topology ID
+ * at the next level
+ */
+ u32 x2apic_shift : 5,
+ /* Reserved */
+ : 27;
+
+ /* ebx */
+ /* Number of processors at current level */
+ u32 num_processors : 16,
+ /* Reserved */
+ : 16;
+
Comment the important things:
Comments should be added where the operation is not obvious. Documenting
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Mon, Feb 12 2024 at 16:43, Borislav Petkov wrote:
> On Mon, Feb 12, 2024 at 04:08:31PM +0100, Thomas Gleixner wrote:
>> This really makes my eyes bleed.
>
> From: Borislav Petkov (AMD) <[email protected]>
> Date: Mon Feb 12 16:41:42 2024 +0100
>
> Documentation/maintainer-tip: Add C++ tail comments exception
>
> Document when C++-style, tail comments should be used.
>
> Signed-off-by: Borislav Petkov (AMD) <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
The following commit has been merged into the x86/misc branch of tip:
Commit-ID: 7dd0a21ccb5a937ca9f798afad34de4ba030f8d4
Gitweb: https://git.kernel.org/tip/7dd0a21ccb5a937ca9f798afad34de4ba030f8d4
Author: Borislav Petkov (AMD) <[email protected]>
AuthorDate: Mon, 12 Feb 2024 16:41:42 +01:00
Committer: Borislav Petkov (AMD) <[email protected]>
CommitterDate: Tue, 13 Feb 2024 13:19:40 +01:00
Documentation/maintainer-tip: Add C++ tail comments exception
Document when C++-style, tail comments should be used.
Signed-off-by: Borislav Petkov (AMD) <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/20240130193102.GEZblOdor_bzoVhT0f@fat_crate.local
---
Documentation/process/maintainer-tip.rst | 30 ++++++++++++++++++++++-
1 file changed, 29 insertions(+), 1 deletion(-)
diff --git a/Documentation/process/maintainer-tip.rst b/Documentation/process/maintainer-tip.rst
index 7993592..497bb39 100644
--- a/Documentation/process/maintainer-tip.rst
+++ b/Documentation/process/maintainer-tip.rst
@@ -480,7 +480,7 @@ Multi-line comments::
* Larger multi-line comments should be split into paragraphs.
*/
-No tail comments:
+No tail comments (see below):
Please refrain from using tail comments. Tail comments disturb the
reading flow in almost all contexts, but especially in code::
@@ -501,6 +501,34 @@ No tail comments:
/* This magic initialization needs a comment. Maybe not? */
seed = MAGIC_CONSTANT;
+ Use C++ style, tail comments when documenting structs in headers to
+ achieve a more compact layout and better readability::
+
+ // eax
+ u32 x2apic_shift : 5, // Number of bits to shift APIC ID right
+ // for the topology ID at the next level
+ : 27; // Reserved
+ // ebx
+ u32 num_processors : 16, // Number of processors at current level
+ : 16; // Reserved
+
+ versus::
+
+ /* eax */
+ /*
+ * Number of bits to shift APIC ID right for the topology ID
+ * at the next level
+ */
+ u32 x2apic_shift : 5,
+ /* Reserved */
+ : 27;
+
+ /* ebx */
+ /* Number of processors at current level */
+ u32 num_processors : 16,
+ /* Reserved */
+ : 16;
+
Comment the important things:
Comments should be added where the operation is not obvious. Documenting