Currently cpu feature flag is checked whenever powerpc_smt_flags gets
called. This is an unnecessary overhead. CPU_FTR_ASYM_SMT is set based
on the processor and all processors will either have this set or will
have it unset.
Hence only check for the feature flag once and cache it to be used
subsequently. This commit will help avoid a branch in powerpc_smt_flags
Signed-off-by: Srikar Dronamraju <[email protected]>
---
Changelog:
v1->v2: Using static keys instead of a variable.
Using pr_info_once instead of printk
arch/powerpc/kernel/smp.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5826f5108a12..37c41297c9ce 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -988,18 +988,16 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
}
static bool shared_caches;
+DEFINE_STATIC_KEY_FALSE(powerpc_asym_packing);
#ifdef CONFIG_SCHED_SMT
/* cpumask of CPUs with asymmetric SMT dependency */
static int powerpc_smt_flags(void)
{
- int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+ if (static_branch_unlikely(&powerpc_asym_packing))
+ return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
- if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
- printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
- flags |= SD_ASYM_PACKING;
- }
- return flags;
+ return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
}
#endif
@@ -1686,6 +1684,11 @@ static void __init fixup_topology(void)
{
int i;
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+ pr_info_once("Enabling Asymmetric SMT scheduling\n");
+ static_branch_enable(&powerpc_asym_packing);
+ }
+
#ifdef CONFIG_SCHED_SMT
if (has_big_cores) {
pr_info("Big cores detected but using small core scheduling\n");
--
2.31.1
Srikar Dronamraju <[email protected]> writes:
> Currently cpu feature flag is checked whenever powerpc_smt_flags gets
> called. This is an unnecessary overhead. CPU_FTR_ASYM_SMT is set based
> on the processor and all processors will either have this set or will
> have it unset.
The cpu_has_feature() test is implemented with a static key.
So AFAICS this is just replacing one static key with another?
I see that you use the new static key in subsequent patches. But
couldn't those just use the existing cpu feature test?
Anyway I'd be interested to see how the generated code differs
before/after this.
cheers
> Hence only check for the feature flag once and cache it to be used
> subsequently. This commit will help avoid a branch in powerpc_smt_flags
>
> Signed-off-by: Srikar Dronamraju <[email protected]>
> ---
> Changelog:
> v1->v2: Using static keys instead of a variable.
> Using pr_info_once instead of printk
>
> arch/powerpc/kernel/smp.c | 15 +++++++++------
> 1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 5826f5108a12..37c41297c9ce 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -988,18 +988,16 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
> }
>
> static bool shared_caches;
> +DEFINE_STATIC_KEY_FALSE(powerpc_asym_packing);
>
> #ifdef CONFIG_SCHED_SMT
> /* cpumask of CPUs with asymmetric SMT dependency */
> static int powerpc_smt_flags(void)
> {
> - int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
> + if (static_branch_unlikely(&powerpc_asym_packing))
> + return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
>
> - if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
> - printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
> - flags |= SD_ASYM_PACKING;
> - }
> - return flags;
> + return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
> }
> #endif
>
> @@ -1686,6 +1684,11 @@ static void __init fixup_topology(void)
> {
> int i;
>
> + if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
> + pr_info_once("Enabling Asymmetric SMT scheduling\n");
> + static_branch_enable(&powerpc_asym_packing);
> + }
> +
> #ifdef CONFIG_SCHED_SMT
> if (has_big_cores) {
> pr_info("Big cores detected but using small core scheduling\n");
> --
> 2.31.1
* Michael Ellerman <[email protected]> [2023-10-19 15:33:16]:
> Srikar Dronamraju <[email protected]> writes:
> > Currently cpu feature flag is checked whenever powerpc_smt_flags gets
> > called. This is an unnecessary overhead. CPU_FTR_ASYM_SMT is set based
> > on the processor and all processors will either have this set or will
> > have it unset.
>
> The cpu_has_feature() test is implemented with a static key.
>
> So AFAICS this is just replacing one static key with another?
>
> I see that you use the new static key in subsequent patches. But
> couldn't those just use the existing cpu feature test?
>
Yes, we can use the existing cpu feature test itself.
> Anyway I'd be interested to see how the generated code differs
> before/after this.
>
---------------------------->8----------------------------------------------8<------------
Before this change
0000000000000500 <powerpc_smt_flags>:
{
500: 00 00 4c 3c addis r2,r12,0
504: 00 00 42 38 addi r2,r2,0
508: a6 02 08 7c mflr r0
50c: 01 00 00 48 bl 50c <powerpc_smt_flags+0xc>
510: f8 ff e1 fb std r31,-8(r1)
514: 91 ff 21 f8 stdu r1,-112(r1)
#define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG)
#define JUMP_LABEL_NOP_SIZE 4
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\n\t"
518: 00 00 00 60 nop
printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
51c: 00 00 22 3d addis r9,r2,0
flags |= SD_ASYM_PACKING;
520: 80 05 e0 3b li r31,1408
printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
524: 00 00 29 89 lbz r9,0(r9)
528: 00 00 09 2c cmpwi r9,0
52c: 28 00 82 41 beq 554 <powerpc_smt_flags+0x54>
}
530: 70 00 21 38 addi r1,r1,112
534: b4 07 e3 7f extsw r3,r31
538: f8 ff e1 eb ld r31,-8(r1)
53c: 20 00 80 4e blr
int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
540: 80 01 e0 3b li r31,384
}
544: 70 00 21 38 addi r1,r1,112
548: b4 07 e3 7f extsw r3,r31
54c: f8 ff e1 eb ld r31,-8(r1)
550: 20 00 80 4e blr
printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
554: a6 02 08 7c mflr r0
558: 00 00 62 3c addis r3,r2,0
55c: 01 00 20 39 li r9,1
560: 00 00 42 3d addis r10,r2,0
564: 00 00 63 38 addi r3,r3,0
568: 00 00 2a 99 stb r9,0(r10)
56c: 80 00 01 f8 std r0,128(r1)
570: 01 00 00 48 bl 570 <powerpc_smt_flags+0x70>
574: 00 00 00 60 nop
578: 80 00 01 e8 ld r0,128(r1)
57c: a6 03 08 7c mtlr r0
580: b0 ff ff 4b b 530 <powerpc_smt_flags+0x30>
584: 00 00 00 60 nop
588: 00 00 00 60 nop
58c: 00 00 00 60 nop
post this change.
0000000000000340 <powerpc_smt_flags>:
{
340: a6 02 08 7c mflr r0
344: 01 00 00 48 bl 344 <powerpc_smt_flags+0x4>
#define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG)
#define JUMP_LABEL_NOP_SIZE 4
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\n\t"
348: 00 00 00 60 nop
return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
34c: 80 01 60 38 li r3,384
}
350: b4 07 63 7c extsw r3,r3
354: 20 00 80 4e blr
358: 00 00 00 60 nop
35c: 00 00 00 60 nop
return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
360: 80 05 60 38 li r3,1408
}
364: b4 07 63 7c extsw r3,r3
368: 20 00 80 4e blr
36c: 00 00 00 60 nop
---------------------------->8----------------------------------------------8<------------
I think the most of the difference is due to moving pr_info_once to
fixup_topology. Does it make sense to move the pr_info_once to
fixup_topology (which is called less often) from powerpc_smt_flags?
Even though the pr_info_once would probably translate to load + cmp + branch
we could avoid that for each smt_flag call.
So something like
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5826f5108a12..bc22f775425b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -993,13 +993,10 @@ static bool shared_caches;
/* cpumask of CPUs with asymmetric SMT dependency */
static int powerpc_smt_flags(void)
{
- int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+ return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
- if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
- printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
- flags |= SD_ASYM_PACKING;
- }
- return flags;
+ return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
}
#endif
@@ -1687,6 +1684,9 @@ static void __init fixup_topology(void)
int i;
#ifdef CONFIG_SCHED_SMT
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT))
+ pr_info_once("Enabling Asymmetric SMT scheduling\n");
+
if (has_big_cores) {
pr_info("Big cores detected but using small core scheduling\n");
powerpc_topology[smt_idx].mask = smallcore_smt_mask;
--
Thanks and Regards
Srikar Dronamraju