2024-01-23 13:51:42

by Thomas Gleixner

[permalink] [raw]
Subject: [patch v2 21/30] x86/cpu/topology: Use topology bitmaps for sizing

From: Thomas Gleixner <[email protected]>

Now that all possible APIC IDs are tracked in the topology bitmaps, its
trivial to retrieve the real information from there.

This gets rid of the guesstimates for the maximal packages and dies per
package as the actual numbers can be determined before a single AP has been
brought up.

The number of SMT threads can now be determined correctly from the bitmaps
in all situations. Up to now a system which has SMT disabled in the BIOS
will still claim that it is SMT capable, because the lowest APIC ID bit is
reserved for that and CPUID leaf 0xb/0x1f still enumerates the SMT domain
accordingly. By calculating the bitmap weights of the SMT and the CORE
domain and setting them into relation the SMT disabled in BIOS situation
reports correctly that the system is not SMT capable.

It also handles the situation correctly when a hybrid systems boot CPU does
not have SMT as it takes the SMT capability of the APs fully into account.

Signed-off-by: Thomas Gleixner <[email protected]>



---
arch/x86/include/asm/smp.h | 3 +--
arch/x86/include/asm/topology.h | 23 ++++++++++++-----------
arch/x86/kernel/cpu/common.c | 9 ++++++---
arch/x86/kernel/cpu/debugfs.c | 2 +-
arch/x86/kernel/cpu/topology.c | 16 +++++++++++++++-
arch/x86/kernel/cpu/topology_common.c | 24 ------------------------
arch/x86/kernel/smpboot.c | 16 ----------------
arch/x86/xen/smp.c | 2 --
8 files changed, 35 insertions(+), 60 deletions(-)
---
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -8,7 +8,7 @@
#include <asm/current.h>
#include <asm/thread_info.h>

-extern int smp_num_siblings;
+extern unsigned int smp_num_siblings;

DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
@@ -109,7 +109,6 @@ void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void smp_prepare_cpus_common(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
-void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_kick_ap(unsigned int cpu, struct task_struct *tidle);
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -143,7 +143,18 @@ extern const struct cpumask *cpu_cluster

#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.amd_node_id)

-extern unsigned int __max_die_per_package;
+extern unsigned int __max_dies_per_package;
+extern unsigned int __max_logical_packages;
+
+static inline unsigned int topology_max_packages(void)
+{
+ return __max_logical_packages;
+}
+
+static inline unsigned int topology_max_die_per_package(void)
+{
+ return __max_dies_per_package;
+}

#ifdef CONFIG_SMP
#define topology_cluster_id(cpu) (cpu_data(cpu).topo.l2c_id)
@@ -152,14 +163,6 @@ extern unsigned int __max_die_per_packag
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))

-extern unsigned int __max_logical_packages;
-#define topology_max_packages() (__max_logical_packages)
-
-static inline int topology_max_die_per_package(void)
-{
- return __max_die_per_package;
-}
-
extern int __max_smt_threads;

static inline int topology_max_smt_threads(void)
@@ -193,13 +196,11 @@ static inline bool topology_is_primary_t
}

#else /* CONFIG_SMP */
-#define topology_max_packages() (1)
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
static inline int
topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
-static inline int topology_max_die_per_package(void) { return 1; }
static inline int topology_max_smt_threads(void) { return 1; }
static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
static inline unsigned int topology_amd_nodes_per_pkg(void) { return 0; };
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -72,11 +72,14 @@
u32 elf_hwcap2 __read_mostly;

/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
+unsigned int smp_num_siblings __ro_after_init = 1;
EXPORT_SYMBOL(smp_num_siblings);

-unsigned int __max_die_per_package __read_mostly = 1;
-EXPORT_SYMBOL(__max_die_per_package);
+unsigned int __max_dies_per_package __ro_after_init = 1;
+EXPORT_SYMBOL(__max_dies_per_package);
+
+unsigned int __max_logical_packages __ro_after_init = 1;
+EXPORT_SYMBOL(__max_logical_packages);

static struct ppin_info {
int feature;
--- a/arch/x86/kernel/cpu/debugfs.c
+++ b/arch/x86/kernel/cpu/debugfs.c
@@ -29,7 +29,7 @@ static int cpu_debug_show(struct seq_fil
seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id);
seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg());
seq_printf(m, "max_cores: %u\n", c->x86_max_cores);
- seq_printf(m, "max_die_per_pkg: %u\n", __max_die_per_package);
+ seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package);
seq_printf(m, "smp_num_siblings: %u\n", smp_num_siblings);
return 0;
}
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -349,8 +349,8 @@ void __init topology_init_possible_cpus(
{
unsigned int assigned = topo_info.nr_assigned_cpus;
unsigned int disabled = topo_info.nr_disabled_cpus;
+ unsigned int cnta, cntb, cpu, allowed = 1;
unsigned int total = assigned + disabled;
- unsigned int cpu, allowed = 1;
u32 apicid;

if (!restrict_to_up()) {
@@ -373,6 +373,20 @@ void __init topology_init_possible_cpus(
total_cpus = allowed;
set_nr_cpu_ids(allowed);

+ cnta = domain_weight(TOPO_PKG_DOMAIN);
+ cntb = domain_weight(TOPO_DIE_DOMAIN);
+ __max_logical_packages = cnta;
+ __max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta));
+
+ pr_info("Max. logical packages: %3u\n", cnta);
+ pr_info("Max. logical dies: %3u\n", cntb);
+ pr_info("Max. dies per package: %3u\n", __max_dies_per_package);
+
+ cnta = domain_weight(TOPO_CORE_DOMAIN);
+ cntb = domain_weight(TOPO_SMT_DOMAIN);
+ smp_num_siblings = 1U << (get_count_order(cntb) - get_count_order(cnta));
+ pr_info("Max. threads per core: %3u\n", smp_num_siblings);
+
pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled);
if (topo_info.nr_rejected_cpus)
pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus);
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -196,16 +196,6 @@ void cpu_parse_topology(struct cpuinfo_x
tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]);
}

- /* Bug compatible with the existing parsers */
- if (tscan.dom_ncpus[TOPO_SMT_DOMAIN] > smp_num_siblings) {
- if (system_state == SYSTEM_BOOTING) {
- pr_warn_once("CPU%d: SMT detected and enabled late\n", cpu);
- smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN];
- } else {
- pr_warn_once("CPU%d: SMT detected after init. Too late!\n", cpu);
- }
- }
-
topo_set_ids(&tscan);
topo_set_max_cores(&tscan);
}
@@ -232,20 +222,6 @@ void __init cpu_init_topology(struct cpu
topo_set_max_cores(&tscan);

/*
- * Bug compatible with the existing code. If the boot CPU does not
- * have SMT this ends up with one sibling. This needs way deeper
- * changes further down the road to get it right during early boot.
- */
- smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN];
-
- /*
- * Neither it's clear whether there are as many dies as the APIC
- * space indicating die level is. But assume that the actual number
- * of CPUs gives a proper indication for now to stay bug compatible.
- */
- __max_die_per_package = tscan.dom_ncpus[TOPO_DIE_DOMAIN] /
- tscan.dom_ncpus[TOPO_DIE_DOMAIN - 1];
- /*
* AMD systems have Nodes per package which cannot be mapped to
* APIC ID.
*/
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -139,8 +139,6 @@ static DEFINE_PER_CPU_READ_MOSTLY(struct
.phys_die_id = U32_MAX,
};

-unsigned int __max_logical_packages __read_mostly;
-EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
static unsigned int logical_die __read_mostly;

@@ -1267,24 +1265,10 @@ void __init native_smp_prepare_boot_cpu(
native_pv_lock_init();
}

-void __init calculate_max_logical_packages(void)
-{
- int ncpus;
-
- /*
- * Today neither Intel nor AMD support heterogeneous systems so
- * extrapolate the boot cpu's data to all packages.
- */
- ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
- __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
- pr_info("Max logical packages: %u\n", __max_logical_packages);
-}
-
void __init native_smp_cpus_done(unsigned int max_cpus)
{
pr_debug("Boot done\n");

- calculate_max_logical_packages();
build_sched_topology();
nmi_selftest();
impress_friends();
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -123,8 +123,6 @@ void __init xen_smp_cpus_done(unsigned i
{
if (xen_hvm_domain())
native_smp_cpus_done(max_cpus);
- else
- calculate_max_logical_packages();
}

void xen_smp_send_reschedule(int cpu)



2024-01-26 08:55:47

by Zhang, Rui

[permalink] [raw]
Subject: Re: [patch v2 21/30] x86/cpu/topology: Use topology bitmaps for sizing


> >  
> > +       cnta = domain_weight(TOPO_PKG_DOMAIN);
> > +       cntb = domain_weight(TOPO_DIE_DOMAIN);
> > +       __max_logical_packages = cnta;
> > +       __max_dies_per_package = 1U << (get_count_order(cntb) - >
> > get_count_order(cnta));
> > +
> > +       pr_info("Max. logical packages: %3u\n", cnta);
> > +       pr_info("Max. logical dies:     %3u\n", cntb);
> > +       pr_info("Max. dies per package: %3u\n", >
> > __max_dies_per_package);
> > +
> > +       cnta = domain_weight(TOPO_CORE_DOMAIN);
> > +       cntb = domain_weight(TOPO_SMT_DOMAIN);
> > +       smp_num_siblings = 1U << (get_count_order(cntb) - >
> > get_count_order(cnta));
> > +       pr_info("Max. threads per core: %3u\n", smp_num_siblings);
> > +

I missed this but Ashok catches it.

Say, on my Adlerlake platform, which has 4 Pcores with HT + 8 Ecores,
cnta is 12, cntb is 16, and smp_num_siblings is set to 1 erroneously.

I think we should use
smp_num_siblings = DIV_ROUND_UP(cntb, cnta);
here.
Or even check each core to get the maximum value (in case there are
more than 2 siblings in a core some day).

thanks,
rui


2024-01-26 20:23:31

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch v2 21/30] x86/cpu/topology: Use topology bitmaps for sizing

On Fri, Jan 26 2024 at 07:07, Zhang, Rui wrote:
>> >  
>> > +       cnta = domain_weight(TOPO_PKG_DOMAIN);
>> > +       cntb = domain_weight(TOPO_DIE_DOMAIN);
>> > +       __max_logical_packages = cnta;
>> > +       __max_dies_per_package = 1U << (get_count_order(cntb) - >
>> > get_count_order(cnta));
>> > +
>> > +       pr_info("Max. logical packages: %3u\n", cnta);
>> > +       pr_info("Max. logical dies:     %3u\n", cntb);
>> > +       pr_info("Max. dies per package: %3u\n", >
>> > __max_dies_per_package);
>> > +
>> > +       cnta = domain_weight(TOPO_CORE_DOMAIN);
>> > +       cntb = domain_weight(TOPO_SMT_DOMAIN);
>> > +       smp_num_siblings = 1U << (get_count_order(cntb) - >
>> > get_count_order(cnta));
>> > +       pr_info("Max. threads per core: %3u\n", smp_num_siblings);
>> > +
>
> I missed this but Ashok catches it.
>
> Say, on my Adlerlake platform, which has 4 Pcores with HT + 8 Ecores,
> cnta is 12, cntb is 16, and smp_num_siblings is set to 1 erroneously.
>
> I think we should use
> smp_num_siblings = DIV_ROUND_UP(cntb, cnta);
> here.

Indeed. That's more than obvious.

> Or even check each core to get the maximum value (in case there are
> more than 2 siblings in a core some day).

We want to get rid of HT not make it worse.

2024-01-28 20:01:23

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [patch v2 21/30] x86/cpu/topology: Use topology bitmaps for sizing

On Fri, Jan 26, 2024 at 09:22:47PM +0100, Thomas Gleixner wrote:
> On Fri, Jan 26 2024 at 07:07, Zhang, Rui wrote:
> >> > ?
> >> > +???????cnta = domain_weight(TOPO_PKG_DOMAIN);
> >> > +???????cntb = domain_weight(TOPO_DIE_DOMAIN);
> >> > +???????__max_logical_packages = cnta;
> >> > +???????__max_dies_per_package = 1U << (get_count_order(cntb) - >
> >> > get_count_order(cnta));
> >> > +
> >> > +???????pr_info("Max. logical packages: %3u\n", cnta);
> >> > +???????pr_info("Max. logical dies:???? %3u\n", cntb);
> >> > +???????pr_info("Max. dies per package: %3u\n", >
> >> > __max_dies_per_package);
> >> > +
> >> > +???????cnta = domain_weight(TOPO_CORE_DOMAIN);
> >> > +???????cntb = domain_weight(TOPO_SMT_DOMAIN);
> >> > +???????smp_num_siblings = 1U << (get_count_order(cntb) - >
> >> > get_count_order(cnta));
> >> > +???????pr_info("Max. threads per core: %3u\n", smp_num_siblings);
> >> > +
> >
> > I missed this but Ashok catches it.
> >
> > Say, on my Adlerlake platform, which has 4 Pcores with HT + 8 Ecores,
> > cnta is 12, cntb is 16, and smp_num_siblings is set to 1 erroneously.
> >
> > I think we should use
> > smp_num_siblings = DIV_ROUND_UP(cntb, cnta);
> > here.
>
> Indeed. That's more than obvious.
>
> > Or even check each core to get the maximum value (in case there are
> > more than 2 siblings in a core some day).
>
> We want to get rid of HT not make it worse.

Hear, hear!!! ;-)

Thanx, Paul

2024-02-12 16:50:32

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch v2 21/30] x86/cpu/topology: Use topology bitmaps for sizing

On Fri, Jan 26 2024 at 21:22, Thomas Gleixner wrote:
> On Fri, Jan 26 2024 at 07:07, Zhang, Rui wrote:
>> Say, on my Adlerlake platform, which has 4 Pcores with HT + 8 Ecores,
>> cnta is 12, cntb is 16, and smp_num_siblings is set to 1 erroneously.
>>
>> I think we should use
>> smp_num_siblings = DIV_ROUND_UP(cntb, cnta);
>> here.
>
> Indeed. That's more than obvious.

I pushed out a new version which addresses this and also the fallout
Michael and Sohil reported:

git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git topo-full-v3

I let the robot chew on it before posting it in the next days.

Thanks,

tglx

2024-02-12 19:49:41

by Michael Kelley

[permalink] [raw]
Subject: RE: [patch v2 21/30] x86/cpu/topology: Use topology bitmaps for sizing

From: Thomas Gleixner <[email protected]> Sent: Monday, February 12, 2024 8:41 AM
>
> On Fri, Jan 26 2024 at 21:22, Thomas Gleixner wrote:
> > On Fri, Jan 26 2024 at 07:07, Zhang, Rui wrote:
> >> Say, on my Adlerlake platform, which has 4 Pcores with HT + 8 Ecores,
> >> cnta is 12, cntb is 16, and smp_num_siblings is set to 1 erroneously.
> >>
> >> I think we should use
> >> smp_num_siblings = DIV_ROUND_UP(cntb, cnta);
> >> here.
> >
> > Indeed. That's more than obvious.
>
> I pushed out a new version which addresses this and also the fallout
> Michael and Sohil reported:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git topo-full-v3
>
> I let the robot chew on it before posting it in the next days.
>

I've tested the topo-full-v3 tag on a Hyper-V guest, and can confirm
that the Rejected CPU count is now correct when nr_cpus=1 is on the
kernel boot line. And as expected, these messages are now emitted
when a kdump kernel boots on other than CPU 0:

[ 0.339622] CPU topo: Boot CPU APIC ID not the first enumerated APIC ID: 2c > 0
[ 0.342993] CPU topo: Crash kernel detected. Disabling real BSP to prevent machine INIT

Earlier, I did a variety of tests on topo-full-v2, and didn't see any other
problems, but I did not repeat those tests on topo-full-v3. So overall,

Tested-by: Michael Kelley <[email protected]>

Tangentially and FWIW, you probably remember our discussion last
fall about Hyper-V, where in some circumstances the APIC IDs in the
guest ACPI tables don't match the APIC ID reported by the CPUID
instruction, resulting in Linux guest "APIC id mismatch" warnings. That
problem appears to be fixed now in the Azure public cloud, and your
full set of topo revisions no longer causes mangled scheduler domains
in those circumstances.

There may be nooks and crannies in the Azure fleet that haven't gotten
the update for whatever reason, but everything I tested is good. The
on-premises Hyper-V as part of the latest Windows 11 has *not* been
updated. I don't have access to a Windows Server instance to test, but
it's probably not updated either. Hopefully they will both be updated
in the coming months.

Michael

2024-02-13 20:23:55

by Sohil Mehta

[permalink] [raw]
Subject: Re: [patch v2 21/30] x86/cpu/topology: Use topology bitmaps for sizing

On 2/12/2024 8:40 AM, Thomas Gleixner wrote:

> I pushed out a new version which addresses this and also the fallout
> Michael and Sohil reported:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git topo-full-v3
>

I see the nr_cpus=1 issue resolved as well with the above set.

Please feel free to add:

Tested-by: Sohil Mehta <[email protected]>

Sohil