2023-08-07 14:39:14

by Thomas Gleixner

[permalink] [raw]
Subject: [patch 53/53] x86/cpu/topology: Get rid of cpuinfo::x86_max_cores

Now that __num_cores_per_package and __num_threads_per_package are
available, cpuinfo::x86_max_cores and the related math all over the place
can be replaced with the ready to consume data.

Signed-off-by: Thomas Gleixner <[email protected]>
---
Documentation/arch/x86/topology.rst | 24 ++++++++---------------
arch/x86/events/intel/uncore_nhmex.c | 4 +--
arch/x86/events/intel/uncore_snb.c | 8 +++----
arch/x86/events/intel/uncore_snbep.c | 16 +++++++--------
arch/x86/include/asm/processor.h | 2 -
arch/x86/kernel/cpu/cacheinfo.c | 2 -
arch/x86/kernel/cpu/common.c | 1
arch/x86/kernel/cpu/debugfs.c | 3 +-
arch/x86/kernel/cpu/mce/inject.c | 3 --
arch/x86/kernel/cpu/microcode/intel.c | 4 +--
arch/x86/kernel/cpu/topology_common.c | 3 --
arch/x86/kernel/smpboot.c | 2 -
drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 -
drivers/hwmon/fam15h_power.c | 2 -
14 files changed, 32 insertions(+), 44 deletions(-)

--- a/Documentation/arch/x86/topology.rst
+++ b/Documentation/arch/x86/topology.rst
@@ -47,17 +47,21 @@ AMD nomenclature for package is 'Node'.

Package-related topology information in the kernel:

- - cpuinfo_x86.x86_max_cores:
+ - topology_num_threads_per_package()

- The number of cores in a package. This information is retrieved via CPUID.
+ The number of threads in a package.

- - cpuinfo_x86.x86_max_dies:
+ - topology_num_cores_per_package()

- The number of dies in a package. This information is retrieved via CPUID.
+ The number of cores in a package.
+
+ - topology_max_dies_per_package()
+
+ The maximum number of dies in a package.

- cpuinfo_x86.topo_die_id:

- The physical ID of the die. This information is retrieved via CPUID.
+ The physical ID of the die.

- cpuinfo_x86.topo.pkg_id:

@@ -96,16 +100,6 @@ are SMT- or CMT-type threads.
AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses
"core".

-Core-related topology information in the kernel:
-
- - smp_num_siblings:
-
- The number of threads in a core. The number of threads in a package can be
- calculated by::
-
- threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings
-
-
Threads
=======
A thread is a single scheduling unit. It's the equivalent to a logical Linux
--- a/arch/x86/events/intel/uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -1221,8 +1221,8 @@ void nhmex_uncore_cpu_init(void)
uncore_nhmex = true;
else
nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
- if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (nhmex_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ nhmex_uncore_cbox.num_boxes = topology_num_cores_per_package();
uncore_msr_uncores = nhmex_msr_uncores;
}
/* end of Nehalem-EX uncore support */
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -364,8 +364,8 @@ static struct intel_uncore_type *snb_msr
void snb_uncore_cpu_init(void)
{
uncore_msr_uncores = snb_msr_uncores;
- if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (snb_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ snb_uncore_cbox.num_boxes = topology_num_cores_per_package();
}

static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
@@ -428,8 +428,8 @@ static struct intel_uncore_type *skl_msr
void skl_uncore_cpu_init(void)
{
uncore_msr_uncores = skl_msr_uncores;
- if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (skl_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ skl_uncore_cbox.num_boxes = topology_num_cores_per_package();
snb_uncore_arb.ops = &skl_uncore_msr_ops;
}

--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1172,8 +1172,8 @@ static struct intel_uncore_type *snbep_m

void snbep_uncore_cpu_init(void)
{
- if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (snbep_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ snbep_uncore_cbox.num_boxes = topology_num_cores_per_package();
uncore_msr_uncores = snbep_msr_uncores;
}

@@ -1837,8 +1837,8 @@ static struct intel_uncore_type *ivbep_m

void ivbep_uncore_cpu_init(void)
{
- if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (ivbep_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ ivbep_uncore_cbox.num_boxes = topology_num_cores_per_package();
uncore_msr_uncores = ivbep_msr_uncores;
}

@@ -2909,8 +2909,8 @@ static bool hswep_has_limit_sbox(unsigne

void hswep_uncore_cpu_init(void)
{
- if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (hswep_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ hswep_uncore_cbox.num_boxes = topology_num_cores_per_package();

/* Detect 6-8 core systems with only two SBOXes */
if (hswep_has_limit_sbox(HSWEP_PCU_DID))
@@ -3272,8 +3272,8 @@ static struct event_constraint bdx_uncor

void bdx_uncore_cpu_init(void)
{
- if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
- bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ if (bdx_uncore_cbox.num_boxes > topology_num_cores_per_package())
+ bdx_uncore_cbox.num_boxes = topology_num_cores_per_package();
uncore_msr_uncores = bdx_msr_uncores;

/* Detect systems with no SBOXes */
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -148,8 +148,6 @@ struct cpuinfo_x86 {
unsigned long loops_per_jiffy;
/* protected processor identification number */
u64 ppin;
- /* cpuid returned max cores value: */
- u16 x86_max_cores;
u16 x86_clflush_size;
/* number of cores as seen by the OS: */
u16 booted_cores;
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -301,7 +301,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_
eax->split.type = types[leaf];
eax->split.level = levels[leaf];
eax->split.num_threads_sharing = 0;
- eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
+ eax->split.num_cores_on_die = topology_num_cores_per_package();


if (assoc == 0xffff)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1698,7 +1698,6 @@ static void identify_cpu(struct cpuinfo_
c->x86_model = c->x86_stepping = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
- c->x86_max_cores = 1;
#ifdef CONFIG_X86_64
c->x86_clflush_size = 64;
c->x86_phys_bits = 36;
--- a/arch/x86/kernel/cpu/debugfs.c
+++ b/arch/x86/kernel/cpu/debugfs.c
@@ -28,7 +28,8 @@ static int cpu_debug_show(struct seq_fil
seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id);
seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id);
seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg());
- seq_printf(m, "max_cores: %u\n", c->x86_max_cores);
+ seq_printf(m, "num_threads: %u\n", __num_threads_per_package);
+ seq_printf(m, "num_cores: %u\n", __num_cores_per_package);
seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package);
seq_printf(m, "max_threads_per_core:%u\n", __max_threads_per_core);
return 0;
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -430,10 +430,9 @@ static void trigger_thr_int(void *info)

static u32 get_nbc_for_node(int node_id)
{
- struct cpuinfo_x86 *c = &boot_cpu_data;
u32 cores_per_node;

- cores_per_node = (c->x86_max_cores * __max_threads_per_core) / topology_amd_nodes_per_pkg();
+ cores_per_node = topology_num_threads_per_package() / topology_amd_nodes_per_pkg();
return cores_per_node * node_id;
}

--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -43,7 +43,7 @@ static const char ucode_path[] = "kernel
static struct microcode_intel *intel_ucode_patch;

/* last level cache size per core */
-static int llc_size_per_core;
+static unsigned long llc_size_per_core;

/*
* Returns 1 if update has been found, 0 otherwise.
@@ -777,7 +777,7 @@ static int __init calc_llc_size_per_core
{
u64 llc_size = c->x86_cache_size * 1024ULL;

- do_div(llc_size, c->x86_max_cores);
+ do_div(llc_size, topology_num_cores_per_package());

return (int)llc_size;
}
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -154,9 +154,6 @@ static void topo_set_ids(struct topo_sca
/* Relative core ID */
c->topo.core_id = topo_relative_domain_id(apicid, TOPO_CORE_DOMAIN);

- /* Maximum number of cores on this package */
- c->x86_max_cores = topology_unit_count(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN);
-
c->topo.amd_node_id = tscan->amd_node_id;

if (c->x86_vendor == X86_VENDOR_AMD)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -566,7 +566,7 @@ static void __init build_sched_topology(
void set_cpu_sibling_map(int cpu)
{
bool has_smt = topology_smt_supported();
- bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
+ bool has_mp = has_smt || topology_num_cores_per_package() > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
int i, threads;
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -472,7 +472,7 @@ static int vangogh_init_smc_tables(struc

#ifdef CONFIG_X86
/* AMD x86 APU only */
- smu->cpu_core_num = boot_cpu_data.x86_max_cores;
+ smu->cpu_core_num = topology_num_cores_per_package();
#else
smu->cpu_core_num = 4;
#endif
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -209,7 +209,7 @@ static ssize_t power1_average_show(struc
* With the new x86 topology modelling, x86_max_cores is the
* compute unit number.
*/
- cu_num = boot_cpu_data.x86_max_cores;
+ cu_num = topology_num_cores_per_package();

ret = read_registers(data);
if (ret)



2023-08-11 16:56:30

by Zhang, Rui

[permalink] [raw]
Subject: Re: [patch 53/53] x86/cpu/topology: Get rid of cpuinfo::x86_max_cores

> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -566,7 +566,7 @@ static void __init build_sched_topology(
>  void set_cpu_sibling_map(int cpu)
>  {
>         bool has_smt = topology_smt_supported();
> -       bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
> +       bool has_mp = has_smt || topology_num_cores_per_package() >
> 1;

or use
bool has_mp = topology_num_threads_per_package() > 1;
?

thanks,
rui

2023-12-14 14:01:42

by Zhang, Rui

[permalink] [raw]
Subject: Re: [patch 53/53] x86/cpu/topology: Get rid of cpuinfo::x86_max_cores


> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -566,7 +566,7 @@ static void __init build_sched_topology(
>  void set_cpu_sibling_map(int cpu)
>  {
>         bool has_smt = topology_smt_supported();
> -       bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
> +       bool has_mp = has_smt || topology_num_cores_per_package() >
> 1;

This patch series, especially the change here, indeed fixes a bug
observed on a platform with below cpuid.1f output,

# taskset -c 0 cpuid -1 -l 0x1f -s 0
CPU:
V2 extended topology (0x1f):
x2APIC ID of logical processor = 0x0 (0)
--- level 0 ---
level number = 0x0 (0)
level type = thread (1)
bit width of level & previous levels = 0x1 (1)
number of logical processors at level = 0x1 (1)
# taskset -c 0 cpuid -1 -l 0x1f -s 1
CPU:
--- level 1 ---
level number = 0x1 (1)
level type = core (2)
bit width of level & previous levels = 0x3 (3)
number of logical processors at level = 0x1 (1)
# taskset -c 0 cpuid -1 -l 0x1f -s 2
CPU:
--- level 2 ---
level number = 0x2 (2)
level type = module (3)
bit width of level & previous levels = 0x7 (7)
number of logical processors at level = 0x18 (24)

The bug is that, on this platform, without this patch series,
1. has_smt is false because there is indeed no HT
2. has_mp is false because
1. core_level_siblings from CPUID.1F Core level EBX is 1
2. current code does not parse Module type and use
core_level_siblings as the number_of_cpus_in_a_package, so
boot_cpu_data.x86_max_cores =
core_level_siblings / smp_num_siblings = 1

As a result, set_cpu_sibling_map() assumes this is a UP system and does
not update any sibling maps, the topology is totally broken then.

thanks,
rui