Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933944AbcDFJVT (ORCPT ); Wed, 6 Apr 2016 05:21:19 -0400 Received: from m50-138.163.com ([123.125.50.138]:50476 "EHLO m50-138.163.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932159AbcDFJVR (ORCPT ); Wed, 6 Apr 2016 05:21:17 -0400 From: zengzhaoxiu@163.com To: tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com, dvlasenk@redhat.com, bp@suse.de, akpm@linux-foundation.org, dvyukov@google.com, keescook@chromium.org Cc: linux-kernel@vger.kernel.org, Zhaoxiu Zeng Subject: [PATCH v2 10/30] Add x86-specific parity functions Date: Wed, 6 Apr 2016 17:14:45 +0800 Message-Id: <1459934085-7152-1-git-send-email-zengzhaoxiu@163.com> X-Mailer: git-send-email 2.5.0 In-Reply-To: <57031D9D.801@gmail.com> References: <57031D9D.801@gmail.com> X-CM-TRANSID: C9GowADnTrGG0wRX0aTbAA--.35310S2 X-Coremail-Antispam: 1Uf129KBjvJXoW3GF1UZFy7Jr4xAryktrW7XFb_yoW7CryUpF 97Crn5trWjg34SyFZIkF1xXa1SyFs7Gr1xtry3ury0vF17J348Arna9rWDArySganY9FWr Cr9xuFWUWF4DXaDanT9S1TB71UUUUUUqnTZGkaVYY2UrUUUUjbIjqfuFe4nvWSU5nxnvy2 9KBjDUYxBIdaVFxhVjvjDU0xZFpf9x07jKAp5UUUUU= X-Originating-IP: [112.95.225.98] X-CM-SenderInfo: p2hqw6xkdr5xrx6rljoofrz/1tbiowJDgFUL5eLQVgABs8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5316 Lines: 218 From: Zhaoxiu Zeng Use alternatives, lifted from arch_hweight Signed-off-by: Zhaoxiu Zeng --- arch/x86/include/asm/arch_hweight.h | 5 ++ arch/x86/include/asm/arch_parity.h | 102 ++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/bitops.h | 4 +- arch/x86/lib/Makefile | 8 +++ arch/x86/lib/parity.c | 32 ++++++++++++ 5 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/arch_parity.h create mode 100644 arch/x86/lib/parity.c diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index 02e799f..c79d50d 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -63,4 +63,9 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) } #endif /* CONFIG_X86_32 */ +#undef POPCNT32 +#undef POPCNT64 +#undef REG_IN +#undef REG_OUT + #endif diff --git a/arch/x86/include/asm/arch_parity.h b/arch/x86/include/asm/arch_parity.h new file mode 100644 index 0000000..09463fd --- /dev/null +++ b/arch/x86/include/asm/arch_parity.h @@ -0,0 +1,100 @@ +#ifndef _ASM_X86_PARITY_H +#define _ASM_X86_PARITY_H + +#include + +#ifdef CONFIG_64BIT +/* popcnt %edi, %eax -- redundant REX prefix for alignment */ +#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" +/* popcnt %rdi, %rax */ +#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" +#define REG_IN "D" +#define REG_OUT "a" +#else +/* popcnt %eax, %eax */ +#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" +#define REG_IN "a" +#define REG_OUT "a" +#endif + +/* + * __sw_parityXX are called from within the alternatives below + * and callee-clobbered registers need to be taken care of. See + * ARCH_HWEIGHT_CFLAGS in for the respective + * compiler switches. + */ +unsigned int __sw_parity32(unsigned int w); +#ifndef CONFIG_X86_32 +unsigned int __sw_parity64(__u64 w); +#endif + +static inline unsigned int __arch_parity4(unsigned int w) +{ + unsigned int res = 0; + + asm("test $0xf, %1; setpo %b0" + : "+q" (res) + : "r" (w) + : "cc"); + + return res; +} + +static inline unsigned int __arch_parity8(unsigned int w) +{ + unsigned int res = 0; + + asm("test %1, %1; setpo %b0" + : "+q" (res) + : "r" (w) + : "cc"); + + return res; +} + +static inline unsigned int __arch_parity16(unsigned int w) +{ + unsigned int res = 0; + + asm("xor %h1, %b1; setpo %b0" + : "+q" (res), "+q" (w) + : : "cc"); + + return res; +} + +static __always_inline unsigned int __arch_parity32(unsigned int w) +{ + unsigned int res; + + asm(ALTERNATIVE("call __sw_parity32", POPCNT32 "; and $1, %0", X86_FEATURE_POPCNT) + : "="REG_OUT (res) + : REG_IN (w) + : "cc"); + + return res; +} + +#ifdef CONFIG_X86_32 +static inline unsigned long __arch_parity64(__u64 w) +{ + return __arch_parity32((u32)w ^ (u32)(w >> 32)); +} +#else +static __always_inline unsigned long __arch_parity64(__u64 w) +{ + unsigned long res; + + asm(ALTERNATIVE("call __sw_parity64", POPCNT64 "; and $1, %0", X86_FEATURE_POPCNT) + : "="REG_OUT (res) + : REG_IN (w) + : "cc"); + + return res; +} +#endif /* CONFIG_X86_32 */ + +#undef POPCNT32 +#undef POPCNT64 +#undef REG_IN +#undef REG_OUT + +#endif diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 7766d1c..f5b0122 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -498,9 +498,11 @@ static __always_inline int fls64(__u64 x) #include #include - #include +#include +#include + #include #include diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 72a5767..5716295 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -4,6 +4,9 @@ # Produces uninteresting flaky coverage. KCOV_INSTRUMENT_delay.o := n +# Kernel does not boot if we instrument this file as it uses custom calling +# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS). +KCOV_INSTRUMENT_parity.o := n inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt @@ -45,3 +48,8 @@ else lib-y += copy_user_64.o lib-y += cmpxchg16b_emu.o endif + +GCOV_PROFILE_parity.o := n +CFLAGS_parity.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) +obj-y += parity.o + diff --git a/arch/x86/lib/parity.c b/arch/x86/lib/parity.c new file mode 100644 index 0000000..762117b --- /dev/null +++ b/arch/x86/lib/parity.c @@ -0,0 +1,32 @@ +#include +#include + +unsigned int __sw_parity32(unsigned int w) +{ + unsigned int res; + w ^= w >> 16; + asm("xor %%ah, %%al \n" + "mov $0, %%eax \n" + "setpo %%al \n" + : "=a" (res) + : "a" (w) + : "cc"); + return res; +} +EXPORT_SYMBOL(__sw_parity32); + +#ifndef CONFIG_X86_32 +unsigned int __sw_parity64(__u64 w) +{ + unsigned int res = (unsigned int)w ^ (unsigned int)(w >> 32); + res ^= res >> 16; + asm("xor %%ah, %%al \n" + "mov $0, %%eax \n" + "setpo %%al \n" + : "=a" (res) + : "a" (res) + : "cc"); + return res; +} +EXPORT_SYMBOL(__sw_parity64); +#endif -- 2.5.0