If there are shared processor LPARs, underlying Hypervisor can have more
virtual cores to handle than actual physical cores.
Starting with Power 9, a core has 2 nearly independent thread groups.
On a shared processors LPARs, it helps to pack threads to lesser number
of cores so that the overall system performance and utilization
improves. PowerVM schedules at a core level. Hence packing to fewer
cores helps.
For example: Lets says there are two 8-core Shared LPARs that are
actually sharing a 8 Core shared physical pool, each running 8 threads
each. Then Consolidating 8 threads to 4 cores on each LPAR would help
them to perform better. This is because each of the LPAR will get
100% time to run applications and there will no switching required by
the Hypervisor.
To achieve this, enable SD_ASYM_PACKING flag at CACHE, MC and DIE level.
Signed-off-by: Srikar Dronamraju <[email protected]>
---
Changelog:
v1->v2: Using static key instead of a variable.
arch/powerpc/kernel/smp.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 37c41297c9ce..498c2d51fc20 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1009,9 +1009,20 @@ static int powerpc_smt_flags(void)
*/
static int powerpc_shared_cache_flags(void)
{
+ if (static_branch_unlikely(&powerpc_asym_packing))
+ return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
+
return SD_SHARE_PKG_RESOURCES;
}
+static int powerpc_shared_proc_flags(void)
+{
+ if (static_branch_unlikely(&powerpc_asym_packing))
+ return SD_ASYM_PACKING;
+
+ return 0;
+}
+
/*
* We can't just pass cpu_l2_cache_mask() directly because
* returns a non-const pointer and the compiler barfs on that.
@@ -1048,8 +1059,8 @@ static struct sched_domain_topology_level powerpc_topology[] = {
{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
#endif
{ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
- { cpu_mc_mask, SD_INIT_NAME(MC) },
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) },
+ { cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(DIE) },
{ NULL, },
};
@@ -1687,6 +1698,8 @@ static void __init fixup_topology(void)
if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
pr_info_once("Enabling Asymmetric SMT scheduling\n");
static_branch_enable(&powerpc_asym_packing);
+ } else if (is_shared_processor() && has_big_cores) {
+ static_branch_enable(&powerpc_asym_packing);
}
#ifdef CONFIG_SCHED_SMT
--
2.31.1
Srikar Dronamraju <[email protected]> writes:
> If there are shared processor LPARs, underlying Hypervisor can have more
> virtual cores to handle than actual physical cores.
>
> Starting with Power 9, a core has 2 nearly independent thread groups.
You need to be clearer here that you're talking about "big cores", not
SMT4 cores as seen on bare metal systems.
> On a shared processors LPARs, it helps to pack threads to lesser number
> of cores so that the overall system performance and utilization
> improves. PowerVM schedules at a core level. Hence packing to fewer
> cores helps.
>
> For example: Lets says there are two 8-core Shared LPARs that are
> actually sharing a 8 Core shared physical pool, each running 8 threads
> each. Then Consolidating 8 threads to 4 cores on each LPAR would help
> them to perform better. This is because each of the LPAR will get
> 100% time to run applications and there will no switching required by
> the Hypervisor.
>
> To achieve this, enable SD_ASYM_PACKING flag at CACHE, MC and DIE level.
.. when the system is running in shared processor mode and has big cores.
cheers
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 37c41297c9ce..498c2d51fc20 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -1009,9 +1009,20 @@ static int powerpc_smt_flags(void)
> */
> static int powerpc_shared_cache_flags(void)
> {
> + if (static_branch_unlikely(&powerpc_asym_packing))
> + return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
> +
> return SD_SHARE_PKG_RESOURCES;
> }
>
> +static int powerpc_shared_proc_flags(void)
> +{
> + if (static_branch_unlikely(&powerpc_asym_packing))
> + return SD_ASYM_PACKING;
> +
> + return 0;
> +}
> +
> /*
> * We can't just pass cpu_l2_cache_mask() directly because
> * returns a non-const pointer and the compiler barfs on that.
> @@ -1048,8 +1059,8 @@ static struct sched_domain_topology_level powerpc_topology[] = {
> { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
> #endif
> { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
> - { cpu_mc_mask, SD_INIT_NAME(MC) },
> - { cpu_cpu_mask, SD_INIT_NAME(DIE) },
> + { cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) },
> + { cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(DIE) },
> { NULL, },
> };
>
> @@ -1687,6 +1698,8 @@ static void __init fixup_topology(void)
> if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
> pr_info_once("Enabling Asymmetric SMT scheduling\n");
> static_branch_enable(&powerpc_asym_packing);
> + } else if (is_shared_processor() && has_big_cores) {
> + static_branch_enable(&powerpc_asym_packing);
> }
>
> #ifdef CONFIG_SCHED_SMT
> --
> 2.31.1
On Thu, Oct 19, 2023 at 03:38:40PM +1100, Michael Ellerman wrote:
> Srikar Dronamraju <[email protected]> writes:
> > If there are shared processor LPARs, underlying Hypervisor can have more
> > virtual cores to handle than actual physical cores.
> >
> > Starting with Power 9, a core has 2 nearly independent thread groups.
>
> You need to be clearer here that you're talking about "big cores", not
> SMT4 cores as seen on bare metal systems.
What is a 'big core' ? I'm thinking big.LITTLE, but I didn't think Power
went that route (yet?).. help?
> > On a shared processors LPARs, it helps to pack threads to lesser number
> > of cores so that the overall system performance and utilization
> > improves. PowerVM schedules at a core level. Hence packing to fewer
> > cores helps.
> >
> > For example: Lets says there are two 8-core Shared LPARs that are
> > actually sharing a 8 Core shared physical pool, each running 8 threads
> > each. Then Consolidating 8 threads to 4 cores on each LPAR would help
> > them to perform better. This is because each of the LPAR will get
> > 100% time to run applications and there will no switching required by
> > the Hypervisor.
> >
> > To achieve this, enable SD_ASYM_PACKING flag at CACHE, MC and DIE level.
>
> .. when the system is running in shared processor mode and has big cores.
>
> cheers
>
> > diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> > index 37c41297c9ce..498c2d51fc20 100644
> > --- a/arch/powerpc/kernel/smp.c
> > +++ b/arch/powerpc/kernel/smp.c
> > @@ -1009,9 +1009,20 @@ static int powerpc_smt_flags(void)
> > */
> > static int powerpc_shared_cache_flags(void)
> > {
> > + if (static_branch_unlikely(&powerpc_asym_packing))
> > + return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
> > +
> > return SD_SHARE_PKG_RESOURCES;
> > }
> >
> > +static int powerpc_shared_proc_flags(void)
> > +{
> > + if (static_branch_unlikely(&powerpc_asym_packing))
> > + return SD_ASYM_PACKING;
> > +
> > + return 0;
> > +}
Can you leave the future reader a clue in the form of a comment around
here perhaps? Explaining *why* things are as they are etc..
> > +
> > /*
> > * We can't just pass cpu_l2_cache_mask() directly because
> > * returns a non-const pointer and the compiler barfs on that.
> > @@ -1048,8 +1059,8 @@ static struct sched_domain_topology_level powerpc_topology[] = {
> > { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
> > #endif
> > { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
> > - { cpu_mc_mask, SD_INIT_NAME(MC) },
> > - { cpu_cpu_mask, SD_INIT_NAME(DIE) },
> > + { cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) },
> > + { cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(DIE) },
> > { NULL, },
> > };
> >
> > @@ -1687,6 +1698,8 @@ static void __init fixup_topology(void)
> > if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
> > pr_info_once("Enabling Asymmetric SMT scheduling\n");
> > static_branch_enable(&powerpc_asym_packing);
> > + } else if (is_shared_processor() && has_big_cores) {
> > + static_branch_enable(&powerpc_asym_packing);
> > }
> >
> > #ifdef CONFIG_SCHED_SMT
> > --
> > 2.31.1
Peter Zijlstra <[email protected]> writes:
> On Thu, Oct 19, 2023 at 03:38:40PM +1100, Michael Ellerman wrote:
>> Srikar Dronamraju <[email protected]> writes:
>> > If there are shared processor LPARs, underlying Hypervisor can have more
>> > virtual cores to handle than actual physical cores.
>> >
>> > Starting with Power 9, a core has 2 nearly independent thread groups.
>>
>> You need to be clearer here that you're talking about "big cores", not
>> SMT4 cores as seen on bare metal systems.
>
> What is a 'big core' ? I'm thinking big.LITTLE, but I didn't think Power
> went that route (yet?).. help?
No it's not big.LITTLE :)
It means we have two SMT4 cores glued together that behave as a single
SMT8 core, a system is either in "big core" mode or it's not, it's never
heterogeneous.
If you grep for "big_core" there's some code in the kernel for dealing
with it, though it's probably not very illuminating.
Possibly we should switch to using the "thread group" terminology, so
that it doesn't confuse folks about big.LITTLE. Also the device tree
property that we use to discover if we're using big cores is called
ibm,thread-groups.
cheers
* Peter Zijlstra <[email protected]> [2023-10-19 09:48:28]:
> On Thu, Oct 19, 2023 at 03:38:40PM +1100, Michael Ellerman wrote:
> > Srikar Dronamraju <[email protected]> writes:
> > > If there are shared processor LPARs, underlying Hypervisor can have more
> > > virtual cores to handle than actual physical cores.
> > >
> > > Starting with Power 9, a core has 2 nearly independent thread groups.
> >
> > You need to be clearer here that you're talking about "big cores", not
> > SMT4 cores as seen on bare metal systems.
>
> What is a 'big core' ? I'm thinking big.LITTLE, but I didn't think Power
> went that route (yet?).. help?
>
Each independent thread group acts as a SMT4 core or a small core. A set of
2 thread groups form a SMT8 core aka big core. PowerVM aka pHYp schedules
at a big core granularity
So if we have 2 LPARS, each spanning 2 big cores, aka 16 CPUs, and if at
somepoint, each LPAR has only 2 threads to run, we are exploring if we can
run both the threads on just one big core, so that PhyP can schedule both
LPARS at the same time and avoid having to switch/multiplex between these
two LPARS.
--
Thanks and Regards
Srikar Dronamraju
On 10/18/23 10:07 PM, Srikar Dronamraju wrote:
> If there are shared processor LPARs, underlying Hypervisor can have more
> virtual cores to handle than actual physical cores.
>
> Starting with Power 9, a core has 2 nearly independent thread groups.
> On a shared processors LPARs, it helps to pack threads to lesser number
> of cores so that the overall system performance and utilization
> improves. PowerVM schedules at a core level. Hence packing to fewer
> cores helps.
>
> For example: Lets says there are two 8-core Shared LPARs that are
> actually sharing a 8 Core shared physical pool, each running 8 threads
> each. Then Consolidating 8 threads to 4 cores on each LPAR would help
> them to perform better. This is because each of the LPAR will get
> 100% time to run applications and there will no switching required by
> the Hypervisor.
>
> To achieve this, enable SD_ASYM_PACKING flag at CACHE, MC and DIE level.
This would have a conflict with tip/master.
DIE has been renamed to PKG and Both changelog and code below should
change DIE to PKG.
>
> Signed-off-by: Srikar Dronamraju <[email protected]>
> ---
> Changelog:
> v1->v2: Using static key instead of a variable.
>
> arch/powerpc/kernel/smp.c | 17 +++++++++++++++--
> 1 file changed, 15 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 37c41297c9ce..498c2d51fc20 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -1009,9 +1009,20 @@ static int powerpc_smt_flags(void)
> */
> static int powerpc_shared_cache_flags(void)
> {
> + if (static_branch_unlikely(&powerpc_asym_packing))
> + return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
> +
> return SD_SHARE_PKG_RESOURCES;
> }
>
> +static int powerpc_shared_proc_flags(void)
> +{
> + if (static_branch_unlikely(&powerpc_asym_packing))
> + return SD_ASYM_PACKING;
> +
> + return 0;
> +}
> +
> /*
> * We can't just pass cpu_l2_cache_mask() directly because
> * returns a non-const pointer and the compiler barfs on that.
> @@ -1048,8 +1059,8 @@ static struct sched_domain_topology_level powerpc_topology[] = {
> { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
> #endif
> { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
> - { cpu_mc_mask, SD_INIT_NAME(MC) },
> - { cpu_cpu_mask, SD_INIT_NAME(DIE) },
> + { cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) },
> + { cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(DIE) },
> { NULL, },
> };
>
> @@ -1687,6 +1698,8 @@ static void __init fixup_topology(void)
> if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
> pr_info_once("Enabling Asymmetric SMT scheduling\n");
> static_branch_enable(&powerpc_asym_packing);
> + } else if (is_shared_processor() && has_big_cores) {
> + static_branch_enable(&powerpc_asym_packing);
> }
>
> #ifdef CONFIG_SCHED_SMT
* Shrikanth Hegde <[email protected]> [2023-10-19 21:26:56]:
>
>
> On 10/18/23 10:07 PM, Srikar Dronamraju wrote:
> > If there are shared processor LPARs, underlying Hypervisor can have more
> > virtual cores to handle than actual physical cores.
> >
> > Starting with Power 9, a core has 2 nearly independent thread groups.
> > On a shared processors LPARs, it helps to pack threads to lesser number
> > of cores so that the overall system performance and utilization
> > improves. PowerVM schedules at a core level. Hence packing to fewer
> > cores helps.
> >
> > For example: Lets says there are two 8-core Shared LPARs that are
> > actually sharing a 8 Core shared physical pool, each running 8 threads
> > each. Then Consolidating 8 threads to 4 cores on each LPAR would help
> > them to perform better. This is because each of the LPAR will get
> > 100% time to run applications and there will no switching required by
> > the Hypervisor.
> >
> > To achieve this, enable SD_ASYM_PACKING flag at CACHE, MC and DIE level.
>
> This would have a conflict with tip/master.
> DIE has been renamed to PKG and Both changelog and code below should
> change DIE to PKG.
Once the changes are part of powerpc/merge, will rebase and accomodate the
changes from DIE to PKG.
--
Thanks and Regards
Srikar Dronamraju