Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757033AbbGVRGt (ORCPT ); Wed, 22 Jul 2015 13:06:49 -0400 Received: from mail-qk0-f172.google.com ([209.85.220.172]:36090 "EHLO mail-qk0-f172.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756272AbbGVRGs (ORCPT ); Wed, 22 Jul 2015 13:06:48 -0400 Message-ID: <55AFCDA4.5010406@gmail.com> Date: Wed, 22 Jul 2015 13:06:44 -0400 From: Jason Baron User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.8.0 MIME-Version: 1.0 To: Borislav Petkov CC: Peter Zijlstra , Andy Lutomirski , Thomas Gleixner , Mikulas Patocka , Paul Mackerras , Arnaldo Carvalho de Melo , Kees Cook , Andrea Arcangeli , Vince Weaver , "hillf.zj" , Valdis Kletnieks , "linux-kernel@vger.kernel.org" , Steven Rostedt Subject: Re: Kernel broken on processors without performance counters References: <559D8250.8000707@gmail.com> <20150710141359.GJ19282@twins.programming.kicks-ass.net> <20150721082107.GE18673@twins.programming.kicks-ass.net> <20150721154959.GS19282@twins.programming.kicks-ass.net> <20150721161215.GU19282@twins.programming.kicks-ass.net> <20150721181553.GA3378@nazgul.tnic> <55AE9471.1000601@gmail.com> <20150722042403.GA6345@nazgul.tnic> In-Reply-To: <20150722042403.GA6345@nazgul.tnic> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 14670 Lines: 426 On 07/22/2015 12:24 AM, Borislav Petkov wrote: > On Tue, Jul 21, 2015 at 02:50:25PM -0400, Jason Baron wrote: >> hmmm...so this is a case where need to the default the branch >> to the out-of-line branch at boot. That is, we can't just enable >> the out-of-line branch at boot time, b/c it might be too late at >> that point? IE native_sched_clock() gets called very early? > Well, even the layout is wrong here. The optimal thing would be to have: > > NOP > rdtsc > > unlikely: > /* read jiffies */ > > at build time. And then at boot time, patch in the JMP over the NOP on > !use_tsc boxes. And RDTSC works always, no matter how early. > > I'm fairly sure we can do that now with alternatives instead of jump > labels. > > The problem currently is that the 5-byte NOP gets patched in with a JMP > so we have an unconditional forwards JMP to the RDTSC. > > Now I'd put my money on most arches handling NOPs better then > unconditional JMPs and this is a hot path... > Ok, So we could add all 4 possible initial states, where the branches would be: static_likely_init_true_branch(struct static_likely_init_true_key *key) static_likely_init_false_branch(struct static_likely_init_false_key *key) static_unlikely_init_false_branch(struct static_unlikely_init_false_key *key) static_unlikely_init_true_branch(struct static_unlikely_init_true_key *key) So, this last one means 'inline' the false branch, but start the branch as true. Which is, I think what you want here. So this addresses the use-case of 'initial' branch direction doesn't match the the default branch bias. We could also do this with link time time patching (and potentially reduce the need for 4 different types), but the implementation below doesn't seem too bad :) Its based upon Peter's initial patch. It unfortunately does require some arch specific bits (so we probably need a 'HAVE_ARCH', wrapper until we have support. Now, the native_sched_clock() starts as: ffffffff8200bf10 : ffffffff8200bf10: 55 push %rbp ffffffff8200bf11: 48 89 e5 mov %rsp,%rbp ffffffff8200bf14: eb 30 jmp ffffffff8200bf46 ffffffff8200bf16: 0f 31 rdtsc And then the '0x3b' gets patch to a 2-byte no-op Thanks, -Jason diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a4c1cf7..030cf52 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -30,6 +30,20 @@ l_yes: return true; } +static __always_inline bool arch_static_branch_jump(struct static_key *key) +{ + asm_volatile_goto("1:" + "jmp %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\" \n\t" + _ASM_ALIGN "\n\t" + _ASM_PTR "1b, %l[l_yes], %c0 \n\t" + ".popsection \n\t" + : : "i" (key) : : l_yes); + return false; +l_yes: + return true; +} + #ifdef CONFIG_X86_64 typedef u64 jump_label_t; #else diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 26d5a55..d40b19d 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -22,6 +22,10 @@ union jump_code_union { char jump; int offset; } __attribute__((packed)); + struct { + char jump_short; + char offset_short; + } __attribute__((packed)); }; static void bug_at(unsigned char *ip, int line) @@ -36,6 +40,8 @@ static void bug_at(unsigned char *ip, int line) BUG(); } +static const unsigned char short_nop[] = { P6_NOP2 }; + static void __jump_label_transform(struct jump_entry *entry, enum jump_label_type type, void *(*poker)(void *, const void *, size_t), @@ -44,47 +50,44 @@ static void __jump_label_transform(struct jump_entry *entry, union jump_code_union code; const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; + void *instruct = (void *)entry->code; + unsigned size = JUMP_LABEL_NOP_SIZE; + unsigned char opcode = *(unsigned char *)instruct; if (type == JUMP_LABEL_ENABLE) { - if (init) { - /* - * Jump label is enabled for the first time. - * So we expect a default_nop... - */ - if (unlikely(memcmp((void *)entry->code, default_nop, 5) - != 0)) - bug_at((void *)entry->code, __LINE__); - } else { - /* - * ...otherwise expect an ideal_nop. Otherwise - * something went horribly wrong. - */ - if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) - != 0)) - bug_at((void *)entry->code, __LINE__); - } + if (opcode == 0xe9 || opcode == 0xeb) + return; - code.jump = 0xe9; - code.offset = entry->target - - (entry->code + JUMP_LABEL_NOP_SIZE); - } else { - /* - * We are disabling this jump label. If it is not what - * we think it is, then something must have gone wrong. - * If this is the first initialization call, then we - * are converting the default nop to the ideal nop. - */ - if (init) { - if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0)) + if (memcmp(instruct, short_nop, 2) == 0) + size = 2; + else if (!(memcmp(instruct, ideal_nop, 5) == 0) && + !(memcmp(instruct, default_nop, 5) == 0)) bug_at((void *)entry->code, __LINE__); + + if (size != JUMP_LABEL_NOP_SIZE) { + code.jump_short = 0xeb; + code.offset = entry->target - (entry->code + size); } else { code.jump = 0xe9; code.offset = entry->target - - (entry->code + JUMP_LABEL_NOP_SIZE); - if (unlikely(memcmp((void *)entry->code, &code, 5) != 0)) - bug_at((void *)entry->code, __LINE__); + (entry->code + size); } - memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); + } else { + if (memcmp(instruct, short_nop, 2) == 0) + return; + + if (memcmp(instruct, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE) == 0) + return; + + if (opcode == 0xeb) + size = 2; + else if ((opcode != 0xe9) && !(memcmp(instruct, default_nop, JUMP_LABEL_NOP_SIZE) == 0)) + bug_at((void *)entry->code, __LINE__); + + if (size != JUMP_LABEL_NOP_SIZE) + memcpy(&code, short_nop, size); + else + memcpy(&code, ideal_nops[NOP_ATOMIC5], size); } /* @@ -96,10 +99,10 @@ static void __jump_label_transform(struct jump_entry *entry, * */ if (poker) - (*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); + (*poker)((void *)entry->code, &code, size); else - text_poke_bp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE, - (void *)entry->code + JUMP_LABEL_NOP_SIZE); + text_poke_bp((void *)entry->code, &code, size, + (void *)entry->code + size); } void arch_jump_label_transform(struct jump_entry *entry, diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 5054497..192c92f 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -38,7 +38,7 @@ static int __read_mostly tsc_unstable; erroneous rdtsc usage on !cpu_has_tsc processors */ static int __read_mostly tsc_disabled = -1; -static struct static_key __use_tsc = STATIC_KEY_INIT; +static struct static_unlikely_init_true_key __use_tsc = STATIC_KEY_UNLIKELY_INIT_TRUE; int tsc_clocksource_reliable; @@ -284,7 +284,7 @@ u64 native_sched_clock(void) * very important for it to be as fast as the platform * can achieve it. ) */ - if (!static_key_false(&__use_tsc)) { + if (static_unlikely_init_true_branch(&__use_tsc)) { /* No locking but a rare wrong value is not a big deal: */ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); } @@ -1201,7 +1201,7 @@ void __init tsc_init(void) /* now allow native_sched_clock() to use rdtsc */ tsc_disabled = 0; - static_key_slow_inc(&__use_tsc); + static_branch_dec(&__use_tsc); if (!no_sched_irq_time) enable_sched_clock_irqtime(); diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index f4de473..558fef0 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -130,6 +130,16 @@ static __always_inline bool static_key_true(struct static_key *key) return !static_key_false(key); } +static __always_inline bool static_key_false_jump(struct static_key *key) +{ + return arch_static_branch_jump(key); +} + +static __always_inline bool static_key_true_jump(struct static_key *key) +{ + return !static_key_false_jump(key); +} + extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; @@ -152,6 +162,20 @@ extern void jump_label_apply_nops(struct module *mod); { .enabled = ATOMIC_INIT(0), \ .entries = (void *)JUMP_LABEL_TYPE_FALSE_BRANCH }) +#define STATIC_KEY_LIKEY_INIT_TRUE ((struct static_unlikely_init_true_key) \ + { .key.enabled = ATOMIC_INIT(1), \ + .key.entries = (void *)JUMP_LABEL_TYPE_TRUE_BRANCH }) +#define STATIC_KEY_LIKEY_INIT_FALSE ((struct static_unlikely_init_false_key) \ + { .key.enabled = ATOMIC_INIT(0), \ + .key.entries = (void *)JUMP_LABEL_TYPE_TRUE_BRANCH }) + +#define STATIC_KEY_UNLIKELY_INIT_FALSE ((struct static_unlikely_init_false_key) \ + { .key.enabled = ATOMIC_INIT(0), \ + .key.entries = (void *)JUMP_LABEL_TYPE_FALSE_BRANCH }) +#define STATIC_KEY_UNLIKELY_INIT_TRUE ((struct static_unlikely_init_true_key) \ + { .key.enabled = ATOMIC_INIT(1), \ + .key.entries = (void *)JUMP_LABEL_TYPE_FALSE_BRANCH }) + #else /* !HAVE_JUMP_LABEL */ static __always_inline void jump_label_init(void) @@ -165,6 +189,7 @@ static __always_inline bool static_key_false(struct static_key *key) return true; return false; } +#define static_key_false_jump static_key_false static __always_inline bool static_key_true(struct static_key *key) { @@ -172,6 +197,7 @@ static __always_inline bool static_key_true(struct static_key *key) return true; return false; } +#define static_key_true_jump static_key_true static inline void static_key_slow_inc(struct static_key *key) { @@ -203,6 +229,15 @@ static inline int jump_label_apply_nops(struct module *mod) #define STATIC_KEY_INIT_FALSE ((struct static_key) \ { .enabled = ATOMIC_INIT(0) }) +#define STATIC_KEY_LIKEY_INIT_TRUE ((struct static_key) \ + { .enabled = ATOMIC_INIT(1) }) +#define STATIC_KEY_LIKEY_INIT_FALSE ((struct static_key) \ + { .enabled = ATOMIC_INIT(0) }) +#define STATIC_KEY_UNLIKELY_INIT_FALSE ((struct static_key) \ + { .enabled = ATOMIC_INIT(0) }) +#define STATIC_KEY_UNLIKELY_INIT_TRUE ((struct static_key) \ + { .enabled = ATOMIC_INIT(1) }) + #endif /* HAVE_JUMP_LABEL */ #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE @@ -213,6 +248,85 @@ static inline bool static_key_enabled(struct static_key *key) return static_key_count(key) > 0; } -#endif /* _LINUX_JUMP_LABEL_H */ +static inline void static_key_enable(struct static_key *key) +{ + int count = static_key_count(key); + + WARN_ON_ONCE(count < 0 || count > 1); + + if (!count) + static_key_slow_inc(key); +} + +static inline void static_key_disable(struct static_key *key) +{ + int count = static_key_count(key); + + WARN_ON_ONCE(count < 0 || count > 1); + + if (count) + static_key_slow_dec(key); +} + +/* -------------------------------------------------------------------------- */ + +/* + * likely -- default enabled, puts the branch body in-line + */ + +struct static_likely_init_true_key { + struct static_key key; +}; + +struct static_likely_init_false_key { + struct static_key key; +}; + +static inline bool static_likely_init_true_branch(struct static_likely_init_true_key *key) +{ + return static_key_true(&key->key); +} + +static inline bool static_likely_init_false_branch(struct static_likely_init_false_key *key) +{ + return static_key_true_jump(&key->key); +} + +/* + * unlikely -- default disabled, puts the branch body out-of-line + */ + +struct static_unlikely_init_false_key { + struct static_key key; +}; + +struct static_unlikely_init_true_key { + struct static_key key; +}; + +static inline bool static_unlikely_init_false_branch(struct static_unlikely_init_false_key *key) +{ + return static_key_false(&key->key); +} + +static inline bool static_unlikely_init_true_branch(struct static_unlikely_init_true_key *key) +{ + return static_key_false_jump(&key->key); +} + +/* + * Advanced usage; refcount, branch is enabled when: count != 0 + */ + +#define static_branch_inc(_k) static_key_slow_inc(&(_k)->key) +#define static_branch_dec(_k) static_key_slow_dec(&(_k)->key) + +/* + * Normal usage; boolean enable/disable. + */ + +#define static_branch_enable(_k) static_key_enable(&(_k)->key) +#define static_branch_disable(_k) static_key_disable(&(_k)->key) #endif /* __ASSEMBLY__ */ +#endif /* _LINUX_JUMP_LABEL_H */ diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 9019f15..47a6478 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -203,7 +203,8 @@ void __init jump_label_init(void) struct static_key *iterk; iterk = (struct static_key *)(unsigned long)iter->key; - arch_jump_label_transform_static(iter, jump_label_type(iterk)); + if (jump_label_type(iterk) == JUMP_LABEL_DISABLE) + arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); if (iterk == key) continue; @@ -318,8 +319,7 @@ static int jump_label_add_module(struct module *mod) jlm->next = key->next; key->next = jlm; - if (jump_label_type(key) == JUMP_LABEL_ENABLE) - __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); + __jump_label_update(key, iter, iter_stop, jump_label_type(key)); } return 0; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/