Some architecture/platforms may need to setup cache properties very
early in the boot along with other cpu topologies so that all these
information can be used to build sched_domains which is used by the
scheduler.
Allow detect_cache_attributes to be called quite early during the boot.
Signed-off-by: Sudeep Holla <[email protected]>
---
drivers/base/cacheinfo.c | 45 ++++++++++++++++++++++++---------------
include/linux/cacheinfo.h | 1 +
2 files changed, 29 insertions(+), 17 deletions(-)
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index ed74db18468f..976142f3e81d 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -193,14 +193,8 @@ static int cache_setup_of_node(unsigned int cpu)
{
struct device_node *np;
struct cacheinfo *this_leaf;
- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
unsigned int index = 0;
- /* skip if fw_token is already populated */
- if (this_cpu_ci->info_list->fw_token) {
- return 0;
- }
-
np = of_cpu_device_node_get(cpu);
if (!np) {
pr_err("Failed to find cpu%d device node\n", cpu);
@@ -236,6 +230,18 @@ int __weak cache_setup_acpi(unsigned int cpu)
unsigned int coherency_max_size;
+static int cache_setup_properties(unsigned int cpu)
+{
+ int ret = 0;
+
+ if (of_have_populated_dt())
+ ret = cache_setup_of_node(cpu);
+ else if (!acpi_disabled)
+ ret = cache_setup_acpi(cpu);
+
+ return ret;
+}
+
static int cache_shared_cpu_map_setup(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -246,21 +252,21 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
if (this_cpu_ci->cpu_map_populated)
return 0;
- if (of_have_populated_dt())
- ret = cache_setup_of_node(cpu);
- else if (!acpi_disabled)
- ret = cache_setup_acpi(cpu);
-
- if (ret)
- return ret;
+ /*
+ * skip setting up cache properties if LLC is valid, just need
+ * to update the shared cpu_map if the cache attributes were
+ * populated early before all the cpus are brought online
+ */
+ if (!last_level_cache_is_valid(cpu)) {
+ ret = cache_setup_properties(cpu);
+ if (ret)
+ return ret;
+ }
for (index = 0; index < cache_leaves(cpu); index++) {
unsigned int i;
this_leaf = per_cpu_cacheinfo_idx(cpu, index);
- /* skip if shared_cpu_map is already populated */
- if (!cpumask_empty(&this_leaf->shared_cpu_map))
- continue;
cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
for_each_online_cpu(i) {
@@ -330,10 +336,13 @@ int __weak populate_cache_leaves(unsigned int cpu)
return -ENOENT;
}
-static int detect_cache_attributes(unsigned int cpu)
+int detect_cache_attributes(unsigned int cpu)
{
int ret;
+ if (per_cpu_cacheinfo(cpu)) /* Already setup */
+ goto update_cpu_map;
+
if (init_cache_level(cpu) || !cache_leaves(cpu))
return -ENOENT;
@@ -349,6 +358,8 @@ static int detect_cache_attributes(unsigned int cpu)
ret = populate_cache_leaves(cpu);
if (ret)
goto free_ci;
+
+update_cpu_map:
/*
* For systems using DT for cache hierarchy, fw_token
* and shared_cpu_map will be set up here only if they are
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 7e429bc5c1a4..00b7a6ae8617 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -84,6 +84,7 @@ int populate_cache_leaves(unsigned int cpu);
int cache_setup_acpi(unsigned int cpu);
bool last_level_cache_is_valid(unsigned int cpu);
bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y);
+int detect_cache_attributes(unsigned int cpu);
#ifndef CONFIG_ACPI_PPTT
/*
* acpi_find_last_cache_level is only called on ACPI enabled
--
2.36.1
Currently ACPI populates just the minimum information about the last
level cache from PPTT in order to feed the same to build sched_domains.
Similar support for DT platforms is not present.
In order to enable the same, the entire cache hierarchy information can
be built as part of CPU topoplogy parsing both on ACPI and DT platforms.
Note that this change builds the cacheinfo early even on ACPI systems, but
the current mechanism of building llc_sibling mask remains unchanged.
Signed-off-by: Sudeep Holla <[email protected]>
---
drivers/base/arch_topology.c | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index f73b836047cf..765723448b10 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -7,6 +7,7 @@
*/
#include <linux/acpi.h>
+#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/device.h>
@@ -775,15 +776,23 @@ __weak int __init parse_acpi_topology(void)
#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
void __init init_cpu_topology(void)
{
+ int ret, cpu;
+
reset_cpu_topology();
+ ret = parse_acpi_topology();
+ if (!ret)
+ ret = of_have_populated_dt() && parse_dt_topology();
- /*
- * Discard anything that was parsed if we hit an error so we
- * don't use partial information.
- */
- if (parse_acpi_topology())
- reset_cpu_topology();
- else if (of_have_populated_dt() && parse_dt_topology())
+ if(ret) {
+ /*
+ * Discard anything that was parsed if we hit an error so we
+ * don't use partial information.
+ */
reset_cpu_topology();
+ return;
+ }
+
+ for_each_possible_cpu(cpu)
+ detect_cache_attributes(cpu);
}
#endif
--
2.36.1
The cacheinfo is now initialised early along with the CPU topology
initialisation. Instead of relying on the LLC ID information parsed
separately only with ACPI PPTT elsewhere, migrate to use the similar
information from the cacheinfo.
This is generic for both DT and ACPI systems. The ACPI LLC ID information
parsed separately can now be removed from arch specific code.
Signed-off-by: Sudeep Holla <[email protected]>
---
drivers/base/arch_topology.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 765723448b10..4c486e4e6f2f 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -663,7 +663,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
/* not numa in package, lets use the package siblings */
core_mask = &cpu_topology[cpu].core_sibling;
}
- if (cpu_topology[cpu].llc_id != -1) {
+
+ if (last_level_cache_is_valid(cpu)) {
if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
core_mask = &cpu_topology[cpu].llc_sibling;
}
@@ -694,7 +695,7 @@ void update_siblings_masks(unsigned int cpuid)
for_each_online_cpu(cpu) {
cpu_topo = &cpu_topology[cpu];
- if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) {
+ if (last_level_cache_is_shared(cpu, cpuid)) {
cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
}
--
2.36.1
Hi Sudeep,
On 5/25/22 4:14 PM, Sudeep Holla wrote:
> Some architecture/platforms may need to setup cache properties very
> early in the boot along with other cpu topologies so that all these
> information can be used to build sched_domains which is used by the
> scheduler.
>
> Allow detect_cache_attributes to be called quite early during the boot.
>
> Signed-off-by: Sudeep Holla <[email protected]>
> ---
> drivers/base/cacheinfo.c | 45 ++++++++++++++++++++++++---------------
> include/linux/cacheinfo.h | 1 +
> 2 files changed, 29 insertions(+), 17 deletions(-)
>
With the comments improved, as below:
Reviewed-by: Gavin Shan <[email protected]>
> diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
> index ed74db18468f..976142f3e81d 100644
> --- a/drivers/base/cacheinfo.c
> +++ b/drivers/base/cacheinfo.c
> @@ -193,14 +193,8 @@ static int cache_setup_of_node(unsigned int cpu)
> {
> struct device_node *np;
> struct cacheinfo *this_leaf;
> - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> unsigned int index = 0;
>
> - /* skip if fw_token is already populated */
> - if (this_cpu_ci->info_list->fw_token) {
> - return 0;
> - }
> -
> np = of_cpu_device_node_get(cpu);
> if (!np) {
> pr_err("Failed to find cpu%d device node\n", cpu);
> @@ -236,6 +230,18 @@ int __weak cache_setup_acpi(unsigned int cpu)
>
> unsigned int coherency_max_size;
>
> +static int cache_setup_properties(unsigned int cpu)
> +{
> + int ret = 0;
> +
> + if (of_have_populated_dt())
> + ret = cache_setup_of_node(cpu);
> + else if (!acpi_disabled)
> + ret = cache_setup_acpi(cpu);
> +
> + return ret;
> +}
> +
> static int cache_shared_cpu_map_setup(unsigned int cpu)
> {
> struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> @@ -246,21 +252,21 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
> if (this_cpu_ci->cpu_map_populated)
> return 0;
>
> - if (of_have_populated_dt())
> - ret = cache_setup_of_node(cpu);
> - else if (!acpi_disabled)
> - ret = cache_setup_acpi(cpu);
> -
> - if (ret)
> - return ret;
> + /*
> + * skip setting up cache properties if LLC is valid, just need
> + * to update the shared cpu_map if the cache attributes were
> + * populated early before all the cpus are brought online
> + */
> + if (!last_level_cache_is_valid(cpu)) {
> + ret = cache_setup_properties(cpu);
> + if (ret)
> + return ret;
> + }
>
> for (index = 0; index < cache_leaves(cpu); index++) {
> unsigned int i;
>
> this_leaf = per_cpu_cacheinfo_idx(cpu, index);
> - /* skip if shared_cpu_map is already populated */
> - if (!cpumask_empty(&this_leaf->shared_cpu_map))
> - continue;
>
> cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
> for_each_online_cpu(i) {
> @@ -330,10 +336,13 @@ int __weak populate_cache_leaves(unsigned int cpu)
> return -ENOENT;
> }
>
> -static int detect_cache_attributes(unsigned int cpu)
> +int detect_cache_attributes(unsigned int cpu)
> {
> int ret;
>
> + if (per_cpu_cacheinfo(cpu)) /* Already setup */
> + goto update_cpu_map;
> +
> if (init_cache_level(cpu) || !cache_leaves(cpu))
> return -ENOENT;
>
Here it might be worthy to explain when CPU's cache info has been
populated, by mentioning CPU info can be populated at booting
and hot-add time.
> @@ -349,6 +358,8 @@ static int detect_cache_attributes(unsigned int cpu)
> ret = populate_cache_leaves(cpu);
> if (ret)
> goto free_ci;
> +
> +update_cpu_map:
> /*
> * For systems using DT for cache hierarchy, fw_token
> * and shared_cpu_map will be set up here only if they are
> diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
> index 7e429bc5c1a4..00b7a6ae8617 100644
> --- a/include/linux/cacheinfo.h
> +++ b/include/linux/cacheinfo.h
> @@ -84,6 +84,7 @@ int populate_cache_leaves(unsigned int cpu);
> int cache_setup_acpi(unsigned int cpu);
> bool last_level_cache_is_valid(unsigned int cpu);
> bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y);
> +int detect_cache_attributes(unsigned int cpu);
> #ifndef CONFIG_ACPI_PPTT
> /*
> * acpi_find_last_cache_level is only called on ACPI enabled
>
Thanks,
Gavin
Hi Sudeep,
On 5/25/22 4:14 PM, Sudeep Holla wrote:
> Currently ACPI populates just the minimum information about the last
> level cache from PPTT in order to feed the same to build sched_domains.
> Similar support for DT platforms is not present.
>
> In order to enable the same, the entire cache hierarchy information can
> be built as part of CPU topoplogy parsing both on ACPI and DT platforms.
>
> Note that this change builds the cacheinfo early even on ACPI systems, but
> the current mechanism of building llc_sibling mask remains unchanged.
>
> Signed-off-by: Sudeep Holla <[email protected]>
> ---
> drivers/base/arch_topology.c | 23 ++++++++++++++++-------
> 1 file changed, 16 insertions(+), 7 deletions(-)
>
With below nits fixed:
Reviewed-by: Gavin Shan <[email protected]>
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index f73b836047cf..765723448b10 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -7,6 +7,7 @@
> */
>
> #include <linux/acpi.h>
> +#include <linux/cacheinfo.h>
> #include <linux/cpu.h>
> #include <linux/cpufreq.h>
> #include <linux/device.h>
> @@ -775,15 +776,23 @@ __weak int __init parse_acpi_topology(void)
> #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
> void __init init_cpu_topology(void)
> {
> + int ret, cpu;
> +
> reset_cpu_topology();
> + ret = parse_acpi_topology();
> + if (!ret)
> + ret = of_have_populated_dt() && parse_dt_topology();
>
> - /*
> - * Discard anything that was parsed if we hit an error so we
> - * don't use partial information.
> - */
> - if (parse_acpi_topology())
> - reset_cpu_topology();
> - else if (of_have_populated_dt() && parse_dt_topology())
> + if(ret) {
> + /*
> + * Discard anything that was parsed if we hit an error so we
> + * don't use partial information.
> + */
> reset_cpu_topology();
> + return;
> + }
> +
> + for_each_possible_cpu(cpu)
> + detect_cache_attributes(cpu);
> }
> #endif
>
# ./scripts/checkpatch.pl --codespell patch/check/0006*
ERROR: space required before the open parenthesis '('
#55: FILE: drivers/base/arch_topology.c:786:
+ if(ret) {
Thanks,
Gavin
On 5/25/22 4:14 PM, Sudeep Holla wrote:
> The cacheinfo is now initialised early along with the CPU topology
> initialisation. Instead of relying on the LLC ID information parsed
> separately only with ACPI PPTT elsewhere, migrate to use the similar
> information from the cacheinfo.
>
> This is generic for both DT and ACPI systems. The ACPI LLC ID information
> parsed separately can now be removed from arch specific code.
>
> Signed-off-by: Sudeep Holla <[email protected]>
> ---
> drivers/base/arch_topology.c | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
>
Reviewed-by: Gavin Shan <[email protected]>
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index 765723448b10..4c486e4e6f2f 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -663,7 +663,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
> /* not numa in package, lets use the package siblings */
> core_mask = &cpu_topology[cpu].core_sibling;
> }
> - if (cpu_topology[cpu].llc_id != -1) {
> +
> + if (last_level_cache_is_valid(cpu)) {
> if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
> core_mask = &cpu_topology[cpu].llc_sibling;
> }
> @@ -694,7 +695,7 @@ void update_siblings_masks(unsigned int cpuid)
> for_each_online_cpu(cpu) {
> cpu_topo = &cpu_topology[cpu];
>
> - if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) {
> + if (last_level_cache_is_shared(cpu, cpuid)) {
> cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
> cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
> }
>
On 25/05/2022 10:14, Sudeep Holla wrote:
> The cacheinfo is now initialised early along with the CPU topology
> initialisation. Instead of relying on the LLC ID information parsed
> separately only with ACPI PPTT elsewhere, migrate to use the similar
> information from the cacheinfo.
>
> This is generic for both DT and ACPI systems. The ACPI LLC ID information
> parsed separately can now be removed from arch specific code.
>
> Signed-off-by: Sudeep Holla <[email protected]>
> ---
> drivers/base/arch_topology.c | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index 765723448b10..4c486e4e6f2f 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -663,7 +663,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
> /* not numa in package, lets use the package siblings */
> core_mask = &cpu_topology[cpu].core_sibling;
> }
> - if (cpu_topology[cpu].llc_id != -1) {
> +
> + if (last_level_cache_is_valid(cpu)) {
> if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
> core_mask = &cpu_topology[cpu].llc_sibling;
> }
> @@ -694,7 +695,7 @@ void update_siblings_masks(unsigned int cpuid)
> for_each_online_cpu(cpu) {
> cpu_topo = &cpu_topology[cpu];
>
> - if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) {
> + if (last_level_cache_is_shared(cpu, cpuid)) {
> cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
> cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
> }
I tested v3 on a Kunpeng920 (w/o CONFIG_NUMA) and it looks
like that last_level_cache_is_shared() isn't working as
expected.
I instrumented cpu_coregroup_mask() like:
const struct cpumask *cpu_coregroup_mask(int cpu)
{
const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
core_mask = &cpu_topology[cpu].core_sibling;
(1)
}
(2)
if (last_level_cache_is_valid(cpu)) {
if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
core_mask = &cpu_topology[cpu].llc_sibling;
(3)
}
if (IS_ENABLED(CONFIG_SCHED_CLUSTER) &&
cpumask_subset(core_mask, &cpu_topology[cpu].cluster_sibling))
core_mask = &cpu_topology[cpu].cluster_sibling;
(4)
(5)
return core_mask;
}
and got:
(A) v3 patch-set:
[ 11.561133] (1) cpu_coregroup_mask[0]=0-47
[ 11.565670] (2) last_level_cache_is_valid(0)=1
[ 11.570587] (3) cpu_coregroup_mask[0]=0 <-- llc_sibling=0 (should be 0-23)
[ 11.574833] (4) cpu_coregroup_mask[0]=0-3 <-- Altra hack kicks in!
[ 11.579275] (5) cpu_coregroup_mask[0]=0-3
# cat /sys/kernel/debug/sched/domains/cpu0/domain*/name
CLS
DIE
# cat /proc/schedstat | awk '{print $1 " " $2 }' | grep ^[cd] | head -3
cpu0 0
domain0 00000000,00000000,0000000f
domain1 ffffffff,ffffffff,ffffffff
So the MC domain is missing.
(B) mainline as reference (cpu_coregroup_mask() slightly different):
[ 11.585008] (1) cpu_coregroup_mask[0]=0-47
[ 11.589544] (3) cpu_coregroup_mask[0]=0-23 <-- !!!
[ 11.594079] (5) cpu_coregroup_mask[0]=0-23
# cat /sys/kernel/debug/sched/domains/cpu0/domain*/name
CLS
MC <-- !!!
DIE
# cat /proc/schedstat | awk '{print $1 " " $2 }' | grep ^[cd] | head -4
cpu0 0
domain0 00000000,00000000,0000000f
domain1 00000000,00000000,00ffffff <-- !!!
domain2 ffffffff,ffffffff,ffffffff
On Thu, Jun 02, 2022 at 04:26:00PM +0200, Dietmar Eggemann wrote:
> On 25/05/2022 10:14, Sudeep Holla wrote:
> > The cacheinfo is now initialised early along with the CPU topology
> > initialisation. Instead of relying on the LLC ID information parsed
> > separately only with ACPI PPTT elsewhere, migrate to use the similar
> > information from the cacheinfo.
> >
> > This is generic for both DT and ACPI systems. The ACPI LLC ID information
> > parsed separately can now be removed from arch specific code.
> >
> > Signed-off-by: Sudeep Holla <[email protected]>
> > ---
> > drivers/base/arch_topology.c | 5 +++--
> > 1 file changed, 3 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> > index 765723448b10..4c486e4e6f2f 100644
> > --- a/drivers/base/arch_topology.c
> > +++ b/drivers/base/arch_topology.c
> > @@ -663,7 +663,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
> > /* not numa in package, lets use the package siblings */
> > core_mask = &cpu_topology[cpu].core_sibling;
> > }
> > - if (cpu_topology[cpu].llc_id != -1) {
> > +
> > + if (last_level_cache_is_valid(cpu)) {
> > if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
> > core_mask = &cpu_topology[cpu].llc_sibling;
> > }
> > @@ -694,7 +695,7 @@ void update_siblings_masks(unsigned int cpuid)
> > for_each_online_cpu(cpu) {
> > cpu_topo = &cpu_topology[cpu];
> >
> > - if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) {
> > + if (last_level_cache_is_shared(cpu, cpuid)) {
> > cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
> > cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
> > }
>
> I tested v3 on a Kunpeng920 (w/o CONFIG_NUMA) and it looks
> like that last_level_cache_is_shared() isn't working as
> expected.
>
Thanks a lot for detailed instrumentation, I am unable to identify why it is
not working though. I will take a deeper look later.
--
Regards,
Sudeep