Hi,
I have tried to change the {cpu,mmu}_has_feature() to use jump label two yeas
ago [1]. But that codes seem a bit ugly. This is a reimplementation by moving the
jump_label_init() much earlier so the jump label can be used in a very earlier
stage. Boot test on p4080ds, t2080rdb and powermac (qemu). This patch series
is against linux-next.
[1] https://lists.ozlabs.org/pipermail/linuxppc-dev/2013-September/111026.html
Kevin Hao (8):
jump_label: no need to acquire the jump_label_mutex in
jump_lable_init()
jump_label: make it possible for the archs to invoke jump_label_init()
much earlier
jump_label: introduce DEFINE_STATIC_KEY_{TRUE,FALSE}_ARRAY macros
powerpc: invoke jump_label_init() in a much earlier stage
powerpc: kill mfvtb()
powerpc: move the cpu_has_feature to a separate file
powerpc: use the jump label for cpu_has_feature
powerpc: use jump label for mmu_has_feature
arch/powerpc/include/asm/cacheflush.h | 1 +
arch/powerpc/include/asm/cpufeatures.h | 34 +++++++++++++++++++++++++++++++++
arch/powerpc/include/asm/cputable.h | 16 ++++++++--------
arch/powerpc/include/asm/cputime.h | 1 +
arch/powerpc/include/asm/dbell.h | 1 +
arch/powerpc/include/asm/dcr-native.h | 1 +
arch/powerpc/include/asm/mman.h | 1 +
arch/powerpc/include/asm/mmu.h | 29 ++++++++++++++++++++++++++++
arch/powerpc/include/asm/reg.h | 9 ---------
arch/powerpc/include/asm/time.h | 3 ++-
arch/powerpc/include/asm/xor.h | 1 +
arch/powerpc/kernel/align.c | 1 +
arch/powerpc/kernel/cputable.c | 33 ++++++++++++++++++++++++++++++++
arch/powerpc/kernel/irq.c | 1 +
arch/powerpc/kernel/process.c | 1 +
arch/powerpc/kernel/setup-common.c | 1 +
arch/powerpc/kernel/setup_32.c | 5 +++++
arch/powerpc/kernel/setup_64.c | 4 ++++
arch/powerpc/kernel/smp.c | 1 +
arch/powerpc/platforms/cell/pervasive.c | 1 +
arch/powerpc/xmon/ppc-dis.c | 1 +
include/linux/jump_label.h | 6 ++++++
kernel/jump_label.c | 5 +++--
23 files changed, 137 insertions(+), 20 deletions(-)
create mode 100644 arch/powerpc/include/asm/cpufeatures.h
--
2.1.0
The jump_label_init() run in a very early stage, even before the
sched_init(). So there is no chance for concurrent access of the
jump label table.
Signed-off-by: Kevin Hao <[email protected]>
---
kernel/jump_label.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index f7dd15d537f9..df1a7fbe7cd5 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -205,7 +205,6 @@ void __init jump_label_init(void)
struct static_key *key = NULL;
struct jump_entry *iter;
- jump_label_lock();
jump_label_sort_entries(iter_start, iter_stop);
for (iter = iter_start; iter < iter_stop; iter++) {
@@ -229,7 +228,6 @@ void __init jump_label_init(void)
#endif
}
static_key_initialized = true;
- jump_label_unlock();
}
#ifdef CONFIG_MODULES
--
2.1.0
For some archs (such as powerpc) would want to invoke jump_label_init()
in a much earlier stage. So check static_key_initialized in order to
make sure this function run only once.
Signed-off-by: Kevin Hao <[email protected]>
---
kernel/jump_label.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index df1a7fbe7cd5..fcae370f8794 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -205,6 +205,9 @@ void __init jump_label_init(void)
struct static_key *key = NULL;
struct jump_entry *iter;
+ if (static_key_initialized)
+ return;
+
jump_label_sort_entries(iter_start, iter_stop);
for (iter = iter_start; iter < iter_stop; iter++) {
--
2.1.0
These are used to define a static_key_{true,false} array.
Signed-off-by: Kevin Hao <[email protected]>
---
include/linux/jump_label.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 7f653e8f6690..5c1d6a49dd6b 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -267,6 +267,12 @@ struct static_key_false {
#define DEFINE_STATIC_KEY_FALSE(name) \
struct static_key_false name = STATIC_KEY_FALSE_INIT
+#define DEFINE_STATIC_KEY_TRUE_ARRAY(name, n) \
+ struct static_key_true name[n] = { [0 ... n - 1] = STATIC_KEY_TRUE_INIT }
+
+#define DEFINE_STATIC_KEY_FALSE_ARRAY(name, n) \
+ struct static_key_false name[n] = { [0 ... n - 1] = STATIC_KEY_FALSE_INIT }
+
#ifdef HAVE_JUMP_LABEL
/*
--
2.1.0
So we can use static_key for cpu_has_feature() and mmu_has_feature().
Signed-off-by: Kevin Hao <[email protected]>
---
arch/powerpc/kernel/setup_32.c | 2 ++
arch/powerpc/kernel/setup_64.c | 2 ++
2 files changed, 4 insertions(+)
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index bb02e9f6944e..35980a2785ba 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -113,6 +113,8 @@ notrace void __init machine_init(u64 dt_ptr)
{
lockdep_init();
+ jump_label_init();
+
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index bdcbb716f4d6..f0802a0b4a20 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -250,6 +250,8 @@ void __init early_setup(unsigned long dt_ptr)
/* Initialize lockdep early or else spinlocks will blow */
lockdep_init();
+ jump_label_init();
+
/* -------- printk is now safe to use ------- */
/* Enable early debugging if any specified (see udbg.h) */
--
2.1.0
This function is only used by get_vtb(). They are almost the same
except the reading from the real register. Move the mfspr() to
get_vtb() and kill the function mfvtb(). With this, we can eliminate
the use of cpu_has_feature() in very core header file like reg.h.
This is a preparation for the use of jump label for cpu_has_feature().
Signed-off-by: Kevin Hao <[email protected]>
---
arch/powerpc/include/asm/reg.h | 9 ---------
arch/powerpc/include/asm/time.h | 2 +-
2 files changed, 1 insertion(+), 10 deletions(-)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index aa1cc5f015ee..d0b5f4b63776 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1207,15 +1207,6 @@
: "r" ((unsigned long)(v)) \
: "memory")
-static inline unsigned long mfvtb (void)
-{
-#ifdef CONFIG_PPC_BOOK3S_64
- if (cpu_has_feature(CPU_FTR_ARCH_207S))
- return mfspr(SPRN_VTB);
-#endif
- return 0;
-}
-
#ifdef __powerpc64__
#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
#define mftb() ({unsigned long rval; \
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 10fc784a2ad4..6f69828458fb 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -106,7 +106,7 @@ static inline u64 get_vtb(void)
{
#ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_ARCH_207S))
- return mfvtb();
+ return mfspr(SPRN_VTB);
#endif
return 0;
}
--
2.1.0
We plan to use jump label for cpu_has_feature. In order to implement
this we need to include the linux/jump_label.h in asm/cputable.h.
But it seems that asm/cputable.h is so basic header file for ppc that
it is almost included by all the other header files. The including of
the linux/jump_label.h will introduces various recursive inclusion.
And it is very hard to fix that. So we choose to move the function
cpu_has_feature to a separate header file before using the jump label
for it. No functional change.
Signed-off-by: Kevin Hao <[email protected]>
---
arch/powerpc/include/asm/cacheflush.h | 1 +
arch/powerpc/include/asm/cpufeatures.h | 14 ++++++++++++++
arch/powerpc/include/asm/cputable.h | 8 --------
arch/powerpc/include/asm/cputime.h | 1 +
arch/powerpc/include/asm/dbell.h | 1 +
arch/powerpc/include/asm/dcr-native.h | 1 +
arch/powerpc/include/asm/mman.h | 1 +
arch/powerpc/include/asm/time.h | 1 +
arch/powerpc/include/asm/xor.h | 1 +
arch/powerpc/kernel/align.c | 1 +
arch/powerpc/kernel/irq.c | 1 +
arch/powerpc/kernel/process.c | 1 +
arch/powerpc/kernel/setup-common.c | 1 +
arch/powerpc/kernel/setup_32.c | 1 +
arch/powerpc/kernel/smp.c | 1 +
arch/powerpc/platforms/cell/pervasive.c | 1 +
arch/powerpc/xmon/ppc-dis.c | 1 +
17 files changed, 29 insertions(+), 8 deletions(-)
create mode 100644 arch/powerpc/include/asm/cpufeatures.h
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 6229e6b6037b..3bdcd9231852 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <asm/cputable.h>
+#include <asm/cpufeatures.h>
/*
* No cache flushing is required when address mappings are changed,
diff --git a/arch/powerpc/include/asm/cpufeatures.h b/arch/powerpc/include/asm/cpufeatures.h
new file mode 100644
index 000000000000..37650db5044f
--- /dev/null
+++ b/arch/powerpc/include/asm/cpufeatures.h
@@ -0,0 +1,14 @@
+#ifndef __ASM_POWERPC_CPUFEATURES_H
+#define __ASM_POWERPC_CPUFEATURES_H
+
+#include <asm/cputable.h>
+
+static inline int cpu_has_feature(unsigned long feature)
+{
+ return (CPU_FTRS_ALWAYS & feature) ||
+ (CPU_FTRS_POSSIBLE
+ & cur_cpu_spec->cpu_features
+ & feature);
+}
+
+#endif /* __ASM_POWERPC_CPUFEATURE_H */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index b118072670fb..ae4b6ef341cd 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -556,14 +556,6 @@ enum {
};
#endif /* __powerpc64__ */
-static inline int cpu_has_feature(unsigned long feature)
-{
- return (CPU_FTRS_ALWAYS & feature) ||
- (CPU_FTRS_POSSIBLE
- & cur_cpu_spec->cpu_features
- & feature);
-}
-
#define HBP_NUM 1
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index e2452550bcb1..b91837865c0e 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -28,6 +28,7 @@ static inline void setup_cputime_one_jiffy(void) { }
#include <asm/div64.h>
#include <asm/time.h>
#include <asm/param.h>
+#include <asm/cpufeatures.h>
typedef u64 __nocast cputime_t;
typedef u64 __nocast cputime64_t;
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 5fa6b20eba10..2d9eae338f70 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -16,6 +16,7 @@
#include <linux/threads.h>
#include <asm/ppc-opcode.h>
+#include <asm/cpufeatures.h>
#define PPC_DBELL_MSG_BRDCAST (0x04000000)
#define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36))
diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h
index 4efc11dacb98..0186ba05bfe1 100644
--- a/arch/powerpc/include/asm/dcr-native.h
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -24,6 +24,7 @@
#include <linux/spinlock.h>
#include <asm/cputable.h>
+#include <asm/cpufeatures.h>
typedef struct {
unsigned int base;
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 8565c254151a..74922ad05e6c 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -13,6 +13,7 @@
#include <asm/cputable.h>
#include <linux/mm.h>
+#include <asm/cpufeatures.h>
/*
* This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits()
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 6f69828458fb..fa63005f827f 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -18,6 +18,7 @@
#include <linux/percpu.h>
#include <asm/processor.h>
+#include <asm/cpufeatures.h>
/* time.c */
extern unsigned long tb_ticks_per_jiffy;
diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h
index 0abb97f3be10..15ba0d07937f 100644
--- a/arch/powerpc/include/asm/xor.h
+++ b/arch/powerpc/include/asm/xor.h
@@ -23,6 +23,7 @@
#ifdef CONFIG_ALTIVEC
#include <asm/cputable.h>
+#include <asm/cpufeatures.h>
void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
unsigned long *v2_in);
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 86150fbb42c3..eaf1c04e51a6 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -26,6 +26,7 @@
#include <asm/emulated_ops.h>
#include <asm/switch_to.h>
#include <asm/disassemble.h>
+#include <asm/cpufeatures.h>
struct aligninfo {
unsigned char len;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 45096033d37b..60355e06ae86 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -74,6 +74,7 @@
#endif
#define CREATE_TRACE_POINTS
#include <asm/trace.h>
+#include <asm/cpufeatures.h>
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 75b6676c1a0b..3e35b19288fc 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -57,6 +57,7 @@
#include <asm/code-patching.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
+#include <asm/cpufeatures.h>
/* Transactional Memory debug */
#ifdef TM_DEBUG_SW
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 44c8d03558ac..7648b532b330 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -61,6 +61,7 @@
#include <asm/cputhreads.h>
#include <mm/mmu_decl.h>
#include <asm/fadump.h>
+#include <asm/cpufeatures.h>
#ifdef DEBUG
#include <asm/udbg.h>
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 35980a2785ba..f0868f510b3b 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -38,6 +38,7 @@
#include <asm/udbg.h>
#include <asm/mmu_context.h>
#include <asm/epapr_hcalls.h>
+#include <asm/cpufeatures.h>
#define DBG(fmt...)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ec9ec2058d2d..9188823fb0e3 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -53,6 +53,7 @@
#include <asm/vdso.h>
#include <asm/debug.h>
#include <asm/kexec.h>
+#include <asm/cpufeatures.h>
#ifdef DEBUG
#include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
index d17e98bc0c10..036215b8192c 100644
--- a/arch/powerpc/platforms/cell/pervasive.c
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -35,6 +35,7 @@
#include <asm/pgtable.h>
#include <asm/reg.h>
#include <asm/cell-regs.h>
+#include <asm/cpufeatures.h>
#include "pervasive.h"
diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c
index 89098f320ad5..ca556b8be9ea 100644
--- a/arch/powerpc/xmon/ppc-dis.c
+++ b/arch/powerpc/xmon/ppc-dis.c
@@ -20,6 +20,7 @@ along with this file; see the file COPYING. If not, write to the Free
Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */
#include <asm/cputable.h>
+#include <asm/cpufeatures.h>
#include "nonstdio.h"
#include "ansidecl.h"
#include "ppc.h"
--
2.1.0
The cpu features are fixed once the probe of cpu features are done.
And the function cpu_has_feature() does be used in some hot path.
The checking of the cpu features for each time of invoking of
cpu_has_feature() seems suboptimal. This tries to reduce this
overhead of this check by using jump label.
The generated assemble code of the following c program:
if (cpu_has_feature(CPU_FTR_XXX))
xxx()
Before:
lis r9,-16230
lwz r9,12324(r9)
lwz r9,12(r9)
andi. r10,r9,512
beqlr-
After:
nop if CPU_FTR_XXX is enabled
b xxx if CPU_FTR_XXX is not enabled
Signed-off-by: Kevin Hao <[email protected]>
---
arch/powerpc/include/asm/cpufeatures.h | 20 ++++++++++++++++++++
arch/powerpc/include/asm/cputable.h | 8 ++++++++
arch/powerpc/kernel/cputable.c | 18 ++++++++++++++++++
arch/powerpc/kernel/setup_32.c | 1 +
arch/powerpc/kernel/setup_64.c | 1 +
5 files changed, 48 insertions(+)
diff --git a/arch/powerpc/include/asm/cpufeatures.h b/arch/powerpc/include/asm/cpufeatures.h
index 37650db5044f..405a97fe6ef9 100644
--- a/arch/powerpc/include/asm/cpufeatures.h
+++ b/arch/powerpc/include/asm/cpufeatures.h
@@ -3,6 +3,25 @@
#include <asm/cputable.h>
+#ifdef CONFIG_JUMP_LABEL
+#include <linux/jump_label.h>
+
+extern struct static_key_true cpu_feat_keys[MAX_CPU_FEATURES];
+
+static inline int cpu_has_feature(unsigned long feature)
+{
+ int i;
+
+ if (CPU_FTRS_ALWAYS & feature)
+ return 1;
+
+ if (!(CPU_FTRS_POSSIBLE & feature))
+ return 0;
+
+ i = __builtin_ctzl(feature);
+ return static_branch_likely(&cpu_feat_keys[i]);
+}
+#else
static inline int cpu_has_feature(unsigned long feature)
{
return (CPU_FTRS_ALWAYS & feature) ||
@@ -10,5 +29,6 @@ static inline int cpu_has_feature(unsigned long feature)
& cur_cpu_spec->cpu_features
& feature);
}
+#endif
#endif /* __ASM_POWERPC_CPUFEATURE_H */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index ae4b6ef341cd..2ebee2894102 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -114,6 +114,12 @@ extern void do_feature_fixups(unsigned long value, void *fixup_start,
extern const char *powerpc_base_platform;
+#ifdef CONFIG_JUMP_LABEL
+extern void cpu_feat_keys_init(void);
+#else
+static inline void cpu_feat_keys_init(void) { }
+#endif
+
/* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */
enum {
TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */
@@ -124,6 +130,8 @@ enum {
/* CPU kernel features */
+#define MAX_CPU_FEATURES (8 * sizeof(((struct cpu_spec *)0)->cpu_features))
+
/* Retain the 32b definitions all use bottom half of word */
#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x00000001)
#define CPU_FTR_L2CR ASM_CONST(0x00000002)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 7d80bfdfb15e..7d4fe69a61ed 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -15,6 +15,7 @@
#include <linux/threads.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/jump_label.h>
#include <asm/oprofile_impl.h>
#include <asm/cputable.h>
@@ -2195,3 +2196,20 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
return NULL;
}
+
+#ifdef CONFIG_JUMP_LABEL
+DEFINE_STATIC_KEY_TRUE_ARRAY(cpu_feat_keys, MAX_CPU_FEATURES);
+EXPORT_SYMBOL_GPL(cpu_feat_keys);
+
+void __init cpu_feat_keys_init(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_CPU_FEATURES; i++) {
+ unsigned long f = 1ul << i;
+
+ if (!(cur_cpu_spec->cpu_features & f))
+ static_branch_disable(&cpu_feat_keys[i]);
+ }
+}
+#endif
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index f0868f510b3b..93756175a13c 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -115,6 +115,7 @@ notrace void __init machine_init(u64 dt_ptr)
lockdep_init();
jump_label_init();
+ cpu_feat_keys_init();
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index f0802a0b4a20..4cf3894d91fa 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -251,6 +251,7 @@ void __init early_setup(unsigned long dt_ptr)
lockdep_init();
jump_label_init();
+ cpu_feat_keys_init();
/* -------- printk is now safe to use ------- */
--
2.1.0
The mmu features are fixed once the probe of mmu features are done.
And the function mmu_has_feature() does be used in some hot path.
The checking of the mmu features for each time of invoking of
mmu_has_feature() seems suboptimal. This tries to reduce this
overhead of this check by using jump label.
The generated assemble code of the following c program:
if (mmu_has_feature(MMU_FTR_XXX))
xxx()
Before:
lis r9,-16230
lwz r9,12324(r9)
lwz r9,24(r9)
andi. r10,r9,16
beqlr+
After:
nop if MMU_FTR_XXX is enabled
b xxx if MMU_FTR_XXX is not enabled
Signed-off-by: Kevin Hao <[email protected]>
---
arch/powerpc/include/asm/mmu.h | 29 +++++++++++++++++++++++++++++
arch/powerpc/kernel/cputable.c | 15 +++++++++++++++
arch/powerpc/kernel/setup_32.c | 1 +
arch/powerpc/kernel/setup_64.c | 1 +
4 files changed, 46 insertions(+)
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 3d5abfe6ba67..e091de352a75 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -109,6 +109,34 @@
DECLARE_PER_CPU(int, next_tlbcam_idx);
#endif
+#ifdef CONFIG_JUMP_LABEL
+#include <linux/jump_label.h>
+
+#define MAX_MMU_FEATURES (8 * sizeof(((struct cpu_spec *)0)->mmu_features))
+
+extern struct static_key_true mmu_feat_keys[MAX_MMU_FEATURES];
+
+extern void mmu_feat_keys_init(void);
+
+static inline int mmu_has_feature(unsigned long feature)
+{
+ int i;
+
+ i = __builtin_ctzl(feature);
+ return static_branch_likely(&mmu_feat_keys[i]);
+}
+
+static inline void mmu_clear_feature(unsigned long feature)
+{
+ int i;
+
+ i = __builtin_ctzl(feature);
+ cur_cpu_spec->mmu_features &= ~feature;
+ static_branch_disable(&mmu_feat_keys[i]);
+}
+#else
+static inline void mmu_feat_keys_init(void) { }
+
static inline int mmu_has_feature(unsigned long feature)
{
return (cur_cpu_spec->mmu_features & feature);
@@ -118,6 +146,7 @@ static inline void mmu_clear_feature(unsigned long feature)
{
cur_cpu_spec->mmu_features &= ~feature;
}
+#endif
extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 7d4fe69a61ed..18a843f139c3 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2212,4 +2212,19 @@ void __init cpu_feat_keys_init(void)
static_branch_disable(&cpu_feat_keys[i]);
}
}
+
+DEFINE_STATIC_KEY_TRUE_ARRAY(mmu_feat_keys, MAX_MMU_FEATURES);
+EXPORT_SYMBOL_GPL(mmu_feat_keys);
+
+void __init mmu_feat_keys_init(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_MMU_FEATURES; i++) {
+ unsigned long f = 1ul << i;
+
+ if (!(cur_cpu_spec->mmu_features & f))
+ static_branch_disable(&mmu_feat_keys[i]);
+ }
+}
#endif
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 93756175a13c..8acff5a4bc3e 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -116,6 +116,7 @@ notrace void __init machine_init(u64 dt_ptr)
jump_label_init();
cpu_feat_keys_init();
+ mmu_feat_keys_init();
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4cf3894d91fa..df6f98f1c46c 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -252,6 +252,7 @@ void __init early_setup(unsigned long dt_ptr)
jump_label_init();
cpu_feat_keys_init();
+ mmu_feat_keys_init();
/* -------- printk is now safe to use ------- */
--
2.1.0
On Thu, Aug 20, 2015 at 08:14:29PM +0800, Kevin Hao wrote:
> The jump_label_init() run in a very early stage, even before the
> sched_init(). So there is no chance for concurrent access of the
> jump label table.
It also doesn't hurt to have it. Its better to be consistent and
conservative with locking unless there is a pressing need.
On Thu, Aug 20, 2015 at 08:14:31PM +0800, Kevin Hao wrote:
> These are used to define a static_key_{true,false} array.
Yes but why...
there might have been some clue in the patches you didn't send me, but
since you didn't send them, I'm left wondering.
On Thu, Aug 20, 2015 at 08:29:03PM +0200, Peter Zijlstra wrote:
> On Thu, Aug 20, 2015 at 08:14:29PM +0800, Kevin Hao wrote:
> > The jump_label_init() run in a very early stage, even before the
> > sched_init(). So there is no chance for concurrent access of the
> > jump label table.
>
> It also doesn't hurt to have it. Its better to be consistent and
> conservative with locking unless there is a pressing need.
Yes, it has no real hurt. IMHO it may cause confusion that the function
jump_label_init() may run in two different thread context simultaneously.
Anyway if you guys don't think so, I can drop this patch.
Thanks,
Kevin
On Thu, Aug 20, 2015 at 08:31:58PM +0200, Peter Zijlstra wrote:
> On Thu, Aug 20, 2015 at 08:14:31PM +0800, Kevin Hao wrote:
> > These are used to define a static_key_{true,false} array.
>
> Yes but why...
>
> there might have been some clue in the patches you didn't send me, but
> since you didn't send them, I'm left wondering.
Sorry for the confusion. In order to use jump label for the
{cpu,mmu}_has_feature() functions on powerpc, we need to declare an array of
32 or 64 static_key_true (one static_key_true for each cpu or mmu feature).
The following are the two patches which depends on this patch.
https://lkml.org/lkml/2015/8/20/355
https://lkml.org/lkml/2015/8/20/356
So far only DEFINE_STATIC_KEY_TRUE_ARRAY macro is used, but I think it may seem
canonical to define the macros for both true or false keys at the same time.
Thanks,
Kevin
* Kevin Hao <[email protected]> wrote:
> These are used to define a static_key_{true,false} array.
>
> Signed-off-by: Kevin Hao <[email protected]>
> ---
> include/linux/jump_label.h | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
> index 7f653e8f6690..5c1d6a49dd6b 100644
> --- a/include/linux/jump_label.h
> +++ b/include/linux/jump_label.h
> @@ -267,6 +267,12 @@ struct static_key_false {
> #define DEFINE_STATIC_KEY_FALSE(name) \
> struct static_key_false name = STATIC_KEY_FALSE_INIT
>
> +#define DEFINE_STATIC_KEY_TRUE_ARRAY(name, n) \
> + struct static_key_true name[n] = { [0 ... n - 1] = STATIC_KEY_TRUE_INIT }
> +
> +#define DEFINE_STATIC_KEY_FALSE_ARRAY(name, n) \
> + struct static_key_false name[n] = { [0 ... n - 1] = STATIC_KEY_FALSE_INIT }
I think the define makes the code more obfuscated and less clear, the open-coded
initialization is pretty dense and easy to read to begin with.
Thanks,
Ingo
On Fri, Aug 21, 2015 at 08:28:26AM +0200, Ingo Molnar wrote:
>
> * Kevin Hao <[email protected]> wrote:
>
> > These are used to define a static_key_{true,false} array.
> >
> > Signed-off-by: Kevin Hao <[email protected]>
> > ---
> > include/linux/jump_label.h | 6 ++++++
> > 1 file changed, 6 insertions(+)
> >
> > diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
> > index 7f653e8f6690..5c1d6a49dd6b 100644
> > --- a/include/linux/jump_label.h
> > +++ b/include/linux/jump_label.h
> > @@ -267,6 +267,12 @@ struct static_key_false {
> > #define DEFINE_STATIC_KEY_FALSE(name) \
> > struct static_key_false name = STATIC_KEY_FALSE_INIT
> >
> > +#define DEFINE_STATIC_KEY_TRUE_ARRAY(name, n) \
> > + struct static_key_true name[n] = { [0 ... n - 1] = STATIC_KEY_TRUE_INIT }
> > +
> > +#define DEFINE_STATIC_KEY_FALSE_ARRAY(name, n) \
> > + struct static_key_false name[n] = { [0 ... n - 1] = STATIC_KEY_FALSE_INIT }
>
> I think the define makes the code more obfuscated and less clear, the open-coded
> initialization is pretty dense and easy to read to begin with.
OK, I will drop this patch and move the initialization of the array to the
corresponding patch.
Thanks,
Kevin
* Kevin Hao <[email protected]> wrote:
> On Fri, Aug 21, 2015 at 08:28:26AM +0200, Ingo Molnar wrote:
> >
> > * Kevin Hao <[email protected]> wrote:
> >
> > > These are used to define a static_key_{true,false} array.
> > >
> > > Signed-off-by: Kevin Hao <[email protected]>
> > > ---
> > > include/linux/jump_label.h | 6 ++++++
> > > 1 file changed, 6 insertions(+)
> > >
> > > diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
> > > index 7f653e8f6690..5c1d6a49dd6b 100644
> > > --- a/include/linux/jump_label.h
> > > +++ b/include/linux/jump_label.h
> > > @@ -267,6 +267,12 @@ struct static_key_false {
> > > #define DEFINE_STATIC_KEY_FALSE(name) \
> > > struct static_key_false name = STATIC_KEY_FALSE_INIT
> > >
> > > +#define DEFINE_STATIC_KEY_TRUE_ARRAY(name, n) \
> > > + struct static_key_true name[n] = { [0 ... n - 1] = STATIC_KEY_TRUE_INIT }
> > > +
> > > +#define DEFINE_STATIC_KEY_FALSE_ARRAY(name, n) \
> > > + struct static_key_false name[n] = { [0 ... n - 1] = STATIC_KEY_FALSE_INIT }
> >
> > I think the define makes the code more obfuscated and less clear, the open-coded
> > initialization is pretty dense and easy to read to begin with.
>
> OK, I will drop this patch and move the initialization of the array to the
> corresponding patch.
Please also Cc: peterz and me to the next submission of the series - static key
(and jump label) changes go through the locking tree normally, and there's a
number of changes pending already for v4.3:
20f9ed1568c0 locking/static_keys: Make verify_keys() static
412758cb2670 jump label, locking/static_keys: Update docs
2bf9e0ab08c6 locking/static_keys: Provide a selftest
ed79e946732e s390/uaccess, locking/static_keys: employ static_branch_likely()
3bbfafb77a06 x86, tsc, locking/static_keys: Employ static_branch_likely()
1987c947d905 locking/static_keys: Add selftest
11276d5306b8 locking/static_keys: Add a new static_key interface
706249c222f6 locking/static_keys: Rework update logic
e33886b38cc8 locking/static_keys: Add static_key_{en,dis}able() helpers
7dcfd915bae5 jump_label: Add jump_entry_key() helper
a1efb01feca5 jump_label, locking/static_keys: Rename JUMP_LABEL_TYPE_* and related helpers to the static_key* pattern
Thanks,
Ingo
On Fri, Aug 21, 2015 at 08:40:59AM +0200, Ingo Molnar wrote:
> Please also Cc: peterz and me to the next submission of the series - static key
> (and jump label) changes go through the locking tree normally, and there's a
> number of changes pending already for v4.3:
Sure.
Thanks,
Kevin