LinuxLists.cc - [PATCH v3 09/10] powerpc/smp: Create coregroup domain

2020-07-23 08:54:27

Subject: [PATCH v3 09/10] powerpc/smp: Create coregroup domain

Add percpu coregroup maps and masks to create coregroup domain.
If a coregroup doesn't exist, the coregroup domain will be degenerated
in favour of SMT/CACHE domain.

Cc: linuxppc-dev <[email protected]>
Cc: LKML <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Nicholas Piggin <[email protected]>
Cc: Anton Blanchard <[email protected]>
Cc: Oliver O'Halloran <[email protected]>
Cc: Nathan Lynch <[email protected]>
Cc: Michael Neuling <[email protected]>
Cc: Gautham R Shenoy <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Valentin Schneider <[email protected]>
Cc: Jordan Niethe <[email protected]>
Signed-off-by: Srikar Dronamraju <[email protected]>
---
Changelog v2 -> v3:
Add optimization for mask updation under coregroup_support

Changelog v1 -> v2:
Moved coregroup topology fixup to fixup_topology (Gautham)

arch/powerpc/include/asm/topology.h | 10 +++++++
arch/powerpc/kernel/smp.c | 44 +++++++++++++++++++++++++++++
arch/powerpc/mm/numa.c | 5 ++++
3 files changed, 59 insertions(+)

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index f0b6300e7dd3..6609174918ab 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -88,12 +88,22 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)

#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
extern int find_and_online_cpu_nid(int cpu);
+extern int cpu_to_coregroup_id(int cpu);
#else
static inline int find_and_online_cpu_nid(int cpu)
{
return 0;
}

+static inline int cpu_to_coregroup_id(int cpu)
+{
+#ifdef CONFIG_SMP
+ return cpu_to_core_id(cpu);
+#else
+ return 0;
+#endif
+}
+
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */

#include <asm-generic/topology.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 7d8d44cbab11..1faedde3e406 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -80,6 +80,7 @@ DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);

EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
@@ -91,6 +92,7 @@ enum {
smt_idx,
#endif
bigcore_idx,
+ mc_idx,
die_idx,
};

@@ -869,6 +871,21 @@ static const struct cpumask *smallcore_smt_mask(int cpu)
}
#endif

+static struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return per_cpu(cpu_coregroup_map, cpu);
+}
+
+static bool has_coregroup_support(void)
+{
+ return coregroup_enabled;
+}
+
+static const struct cpumask *cpu_mc_mask(int cpu)
+{
+ return cpu_coregroup_mask(cpu);
+}
+
static const struct cpumask *cpu_bigcore_mask(int cpu)
{
return per_cpu(cpu_sibling_map, cpu);
@@ -879,6 +896,7 @@ static struct sched_domain_topology_level powerpc_topology[] = {
{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
#endif
{ cpu_bigcore_mask, SD_INIT_NAME(BIGCORE) },
+ { cpu_mc_mask, SD_INIT_NAME(MC) },
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },
};
@@ -925,6 +943,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
GFP_KERNEL, cpu_to_node(cpu));
zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
+ if (has_coregroup_support())
+ zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+
#ifdef CONFIG_NEED_MULTIPLE_NODES
/*
* numa_node_id() works after this.
@@ -942,6 +964,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));

+ if (has_coregroup_support())
+ cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
+
init_big_cores();
if (has_big_cores) {
cpumask_set_cpu(boot_cpuid,
@@ -1233,6 +1258,8 @@ static void remove_cpu_from_masks(int cpu)
set_cpus_unrelated(cpu, i, cpu_sibling_mask);
if (has_big_cores)
set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
+ if (has_coregroup_support())
+ set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
}
}
#endif
@@ -1293,6 +1320,20 @@ static void add_cpu_to_masks(int cpu)
add_cpu_to_smallcore_masks(cpu);
update_mask_by_l2(cpu, cpu_l2_cache_mask);

+ if (has_coregroup_support()) {
+ int coregroup_id = cpu_to_coregroup_id(cpu);
+
+ cpumask_set_cpu(cpu, cpu_coregroup_mask(cpu));
+ for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) {
+ int fcpu = cpu_first_thread_sibling(i);
+
+ if (fcpu == first_thread)
+ set_cpus_related(cpu, i, cpu_coregroup_mask);
+ else if (coregroup_id == cpu_to_coregroup_id(i))
+ set_cpus_related(cpu, i, cpu_coregroup_mask);
+ }
+ }
+
if (pkg_id == -1) {
struct cpumask *(*mask)(int) = cpu_sibling_mask;

@@ -1384,6 +1425,9 @@ int setup_profiling_timer(unsigned int multiplier)

static void fixup_topology(void)
{
+ if (!has_coregroup_support())
+ powerpc_topology[mc_idx].mask = cpu_bigcore_mask;
+
if (shared_caches) {
pr_info("Using shared cache scheduler topology\n");
powerpc_topology[bigcore_idx].mask = shared_cache_mask;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 3248160c0327..ef8aa580da21 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1216,6 +1216,11 @@ int find_and_online_cpu_nid(int cpu)
return new_nid;
}

+int cpu_to_coregroup_id(int cpu)
+{
+ return cpu_to_core_id(cpu);
+}
+
static int topology_update_init(void)
{
topology_inited = 1;
--
2.18.2

2020-07-27 04:44:26

by Gautham R Shenoy

[permalink] [raw]

Subject: Re: [PATCH v3 09/10] powerpc/smp: Create coregroup domain

On Thu, Jul 23, 2020 at 02:21:15PM +0530, Srikar Dronamraju wrote:
> Add percpu coregroup maps and masks to create coregroup domain.
> If a coregroup doesn't exist, the coregroup domain will be degenerated
> in favour of SMT/CACHE domain.
>
> Cc: linuxppc-dev <[email protected]>
> Cc: LKML <[email protected]>
> Cc: Michael Ellerman <[email protected]>
> Cc: Nicholas Piggin <[email protected]>
> Cc: Anton Blanchard <[email protected]>
> Cc: Oliver O'Halloran <[email protected]>
> Cc: Nathan Lynch <[email protected]>
> Cc: Michael Neuling <[email protected]>
> Cc: Gautham R Shenoy <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Valentin Schneider <[email protected]>
> Cc: Jordan Niethe <[email protected]>
> Signed-off-by: Srikar Dronamraju <[email protected]>
> ---
> Changelog v2 -> v3:
> Add optimization for mask updation under coregroup_support
>
> Changelog v1 -> v2:
> Moved coregroup topology fixup to fixup_topology (Gautham)
>
> arch/powerpc/include/asm/topology.h | 10 +++++++
> arch/powerpc/kernel/smp.c | 44 +++++++++++++++++++++++++++++
> arch/powerpc/mm/numa.c | 5 ++++
> 3 files changed, 59 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
> index f0b6300e7dd3..6609174918ab 100644
> --- a/arch/powerpc/include/asm/topology.h
> +++ b/arch/powerpc/include/asm/topology.h
> @@ -88,12 +88,22 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
>
> #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
> extern int find_and_online_cpu_nid(int cpu);
> +extern int cpu_to_coregroup_id(int cpu);
> #else
> static inline int find_and_online_cpu_nid(int cpu)
> {
> return 0;
> }
>
> +static inline int cpu_to_coregroup_id(int cpu)
> +{
> +#ifdef CONFIG_SMP
> + return cpu_to_core_id(cpu);
> +#else
> + return 0;
> +#endif
> +}
> +
> #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
>
> #include <asm-generic/topology.h>
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 7d8d44cbab11..1faedde3e406 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -80,6 +80,7 @@ DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
> DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
> DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
> DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
> +DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
>
> EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
> EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
> @@ -91,6 +92,7 @@ enum {
> smt_idx,
> #endif
> bigcore_idx,
> + mc_idx,
> die_idx,
> };
>
> @@ -869,6 +871,21 @@ static const struct cpumask *smallcore_smt_mask(int cpu)
> }
> #endif
>
> +static struct cpumask *cpu_coregroup_mask(int cpu)
> +{
> + return per_cpu(cpu_coregroup_map, cpu);
> +}
> +
> +static bool has_coregroup_support(void)
> +{
> + return coregroup_enabled;
> +}
> +
> +static const struct cpumask *cpu_mc_mask(int cpu)
> +{
> + return cpu_coregroup_mask(cpu);
> +}
> +
> static const struct cpumask *cpu_bigcore_mask(int cpu)
> {
> return per_cpu(cpu_sibling_map, cpu);
> @@ -879,6 +896,7 @@ static struct sched_domain_topology_level powerpc_topology[] = {
> { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
> #endif
> { cpu_bigcore_mask, SD_INIT_NAME(BIGCORE) },
> + { cpu_mc_mask, SD_INIT_NAME(MC) },
> { cpu_cpu_mask, SD_INIT_NAME(DIE) },
> { NULL, },
> };

[..snip..]

> @@ -1384,6 +1425,9 @@ int setup_profiling_timer(unsigned int multiplier)
>
> static void fixup_topology(void)
> {
> + if (!has_coregroup_support())
> + powerpc_topology[mc_idx].mask = cpu_bigcore_mask;
> +
> if (shared_caches) {
> pr_info("Using shared cache scheduler topology\n");
> powerpc_topology[bigcore_idx].mask = shared_cache_mask;

Suppose we consider a topology which does not have coregroup_support,
but has shared_caches. In that case, we would want our coregroup
domain to degenerate.

From the above code, after the fixup, our topology will look as
follows:

static struct sched_domain_topology_level powerpc_topology[] = {
{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
{ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
{ cpu_bigcore_mask, SD_INIT_NAME(MC) },
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },

So, in this case, the core-group domain (identified by MC) will
degenerate only if cpu_bigcore_mask() and shared_cache_mask() return
the same value. This may work for existing platforms, because either
shared_caches don't exist, or when they do, cpu_bigcore_mask and
shared_cache_mask return the same set of CPUs. But this may or may not
continue to hold good in the future.

Furthermore, if that is always going to be the case that in the
presence of shared_caches the cpu_bigcore_mask() and
shared_cache_mask() will always be the same, then why even define two
separate masks and not just have only the cpu_bigcore_mask() ?

The correct way would be to set the powerpc_topology[mc_idx].mask to
powerpc_topology[bigcore_idx].mask *after* we have fixedup the
big_core level.
--
Thanks and Regards
gautham.

2020-07-27 11:20:01

by Srikar Dronamraju

[permalink] [raw]

Subject: Re: [PATCH v3 09/10] powerpc/smp: Create coregroup domain

* Gautham R Shenoy <[email protected]> [2020-07-27 10:09:41]:

> >
> > static void fixup_topology(void)
> > {
> > + if (!has_coregroup_support())
> > + powerpc_topology[mc_idx].mask = cpu_bigcore_mask;
> > +
> > if (shared_caches) {
> > pr_info("Using shared cache scheduler topology\n");
> > powerpc_topology[bigcore_idx].mask = shared_cache_mask;
>
>
> Suppose we consider a topology which does not have coregroup_support,
> but has shared_caches. In that case, we would want our coregroup
> domain to degenerate.
>
> From the above code, after the fixup, our topology will look as
> follows:
>
> static struct sched_domain_topology_level powerpc_topology[] = {
> { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
> { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
> { cpu_bigcore_mask, SD_INIT_NAME(MC) },
> { cpu_cpu_mask, SD_INIT_NAME(DIE) },
> { NULL, },
>
> So, in this case, the core-group domain (identified by MC) will
> degenerate only if cpu_bigcore_mask() and shared_cache_mask() return
> the same value. This may work for existing platforms, because either
> shared_caches don't exist, or when they do, cpu_bigcore_mask and
> shared_cache_mask return the same set of CPUs. But this may or may not
> continue to hold good in the future.
>
> Furthermore, if that is always going to be the case that in the
> presence of shared_caches the cpu_bigcore_mask() and
> shared_cache_mask() will always be the same, then why even define two
> separate masks and not just have only the cpu_bigcore_mask() ?
>

Your two statements are contradicting. In the former you are saying we
should be future proof and in the latter, you are asking for why add if they
are both going to be the same.

> The correct way would be to set the powerpc_topology[mc_idx].mask to
> powerpc_topology[bigcore_idx].mask *after* we have fixedup the
> big_core level.

The reason I modified it in v4 is not for degeneration or for future case
but for the current PowerNV/SMT 4 case. I could have as well detected the
the same and modified bigcore but thought fixup at one place would be
better.

--
Thanks and Regards
Srikar Dronamraju