Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752433AbdLGWmL (ORCPT ); Thu, 7 Dec 2017 17:42:11 -0500 Received: from mail-wr0-f193.google.com ([209.85.128.193]:43412 "EHLO mail-wr0-f193.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752321AbdLGWmI (ORCPT ); Thu, 7 Dec 2017 17:42:08 -0500 X-Google-Smtp-Source: AGs4zMY1fhoQBY/dVfKxXjYSZOMZ8A2sitBIH7QkWH4/loCdamIHzMdGoTeeLDAOUFnTxOvcMVdEEg== From: Alexey Dobriyan To: linux-kernel@vger.kernel.org Cc: x86@kernel.org, tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com, Alexey Dobriyan Subject: [PATCH 5/5] -march=native: MOVBE support Date: Fri, 8 Dec 2017 01:41:54 +0300 Message-Id: <20171207224154.4687-5-adobriyan@gmail.com> X-Mailer: git-send-email 2.13.6 In-Reply-To: <20171207224154.4687-1-adobriyan@gmail.com> References: <20171207224154.4687-1-adobriyan@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4852 Lines: 182 Use MOVBE if it is available. This doesn't save code size as MOVBE seems to be as long as MOV+BSWAP, It is not clear if it saves uop, maybe it will in the future. Do it because it is easy, I guess. --- arch/x86/crypto/des3_ede-asm_64.S | 28 ++++++++++++++++++++++++++++ arch/x86/net/bpf_jit.S | 12 ++++++++++++ scripts/kconfig/cpuid.c | 4 ++++ scripts/march-native.sh | 3 ++- 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index 8e49ce117494..007319ea1f62 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -159,6 +159,15 @@ #define dummy2(a, b) /*_*/ +#ifdef CONFIG_MARCH_NATIVE_MOVBE +#define read_block(io, left, right) \ + movbe (io), left##d; \ + movbe 4(io), right##d; + +#define write_block(io, left, right) \ + movbe left##d, (io); \ + movbe right##d, 4(io); +#else #define read_block(io, left, right) \ movl (io), left##d; \ movl 4(io), right##d; \ @@ -170,6 +179,7 @@ bswapl right##d; \ movl left##d, (io); \ movl right##d, 4(io); +#endif ENTRY(des3_ede_x86_64_crypt_blk) /* input: @@ -443,6 +453,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) pushq %rsi /* dst */ /* load input */ +#ifdef CONFIG_MARCH_NATIVE_MOVBE + movbe 0 * 4(%rdx), RL0d; + movbe 1 * 4(%rdx), RR0d; + movbe 2 * 4(%rdx), RL1d; + movbe 3 * 4(%rdx), RR1d; + movbe 4 * 4(%rdx), RL2d; + movbe 5 * 4(%rdx), RR2d; +#else movl 0 * 4(%rdx), RL0d; movl 1 * 4(%rdx), RR0d; movl 2 * 4(%rdx), RL1d; @@ -456,6 +474,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) bswapl RR1d; bswapl RL2d; bswapl RR2d; +#endif initial_permutation3(RL, RR); @@ -516,6 +535,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) final_permutation3(RR, RL); +#ifdef CONFIG_MARCH_NATIVE_MOVBE + movbe RR0d, 0 * 4(%rsi); + movbe RL0d, 1 * 4(%rsi); + movbe RR1d, 2 * 4(%rsi); + movbe RL1d, 3 * 4(%rsi); + movbe RR2d, 4 * 4(%rsi); + movbe RL2d, 5 * 4(%rsi); +#else bswapl RR0d; bswapl RL0d; bswapl RR1d; @@ -530,6 +557,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) movl RL1d, 3 * 4(%rsi); movl RR2d, 4 * 4(%rsi); movl RL2d, 5 * 4(%rsi); +#endif popq %r15; popq %r14; diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index b33093f84528..17fe33750298 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -34,8 +34,12 @@ FUNC(sk_load_word_positive_offset) sub %esi,%eax # hlen - offset cmp $3,%eax jle bpf_slow_path_word +#ifdef CONFIG_MARCH_NATIVE_MOVBE + movbe (SKBDATA,%rsi),%eax +#else mov (SKBDATA,%rsi),%eax bswap %eax /* ntohl() */ +#endif ret FUNC(sk_load_half) @@ -80,8 +84,12 @@ FUNC(sk_load_byte_positive_offset) bpf_slow_path_word: bpf_slow_path_common(4) js bpf_error +#ifdef CONFIG_MARCH_NATIVE_MOVBE + movbe 32(%rbp),%eax +#else mov 32(%rbp),%eax bswap %eax +#endif ret bpf_slow_path_half: @@ -118,8 +126,12 @@ bpf_slow_path_word_neg: FUNC(sk_load_word_negative_offset) sk_negative_common(4) +#ifdef CONFIG_MARCH_NATIVE_MOVBE + movbe (%rax), %eax +#else mov (%rax), %eax bswap %eax +#endif ret bpf_slow_path_half_neg: diff --git a/scripts/kconfig/cpuid.c b/scripts/kconfig/cpuid.c index ecb285183581..2c23c8699ae6 100644 --- a/scripts/kconfig/cpuid.c +++ b/scripts/kconfig/cpuid.c @@ -42,6 +42,7 @@ static inline void cpuid2(uint32_t eax0, uint32_t ecx0, uint32_t *eax, uint32_t ); } +static bool movbe = false; static bool popcnt = false; static bool rep_movsb = false; static bool rep_stosb = false; @@ -56,6 +57,8 @@ static void intel(void) cpuid(1, &eax, &ecx, &edx, &ebx); // printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx); + if (ecx & (1 << 22)) + movbe = true; if (ecx & (1 << 23)) popcnt = true; } @@ -86,6 +89,7 @@ int main(int argc, char *argv[]) intel(); #define _(x) if (streq(opt, #x)) return x ? EXIT_SUCCESS : EXIT_FAILURE + _(movbe); _(popcnt); _(rep_movsb); _(rep_stosb); diff --git a/scripts/march-native.sh b/scripts/march-native.sh index d3adf0edb2be..93f6a9bd4a6c 100755 --- a/scripts/march-native.sh +++ b/scripts/march-native.sh @@ -30,6 +30,7 @@ option() { } if test -x "$CPUID"; then + "$CPUID" movbe && option "CONFIG_MARCH_NATIVE_MOVBE" "$CPUID" popcnt && option "CONFIG_MARCH_NATIVE_POPCNT" "$CPUID" rep_movsb && option "CONFIG_MARCH_NATIVE_REP_MOVSB" "$CPUID" rep_stosb && option "CONFIG_MARCH_NATIVE_REP_STOSB" @@ -76,7 +77,7 @@ for i in $COLLECT_GCC_OPTIONS; do -mhle) option "CONFIG_MARCH_NATIVE_HLE" ;; -mlzcnt) option "CONFIG_MARCH_NATIVE_LZCNT" ;; -mmmx) option "CONFIG_MARCH_NATIVE_MMX" ;; - -mmovbe) option "CONFIG_MARCH_NATIVE_MOVBE" ;; + -mmovbe);; -mpclmul) option "CONFIG_MARCH_NATIVE_PCLMUL" ;; -mpopcnt);; -mprfchw) option "CONFIG_MARCH_NATIVE_PREFETCHW" ;; -- 2.13.6