Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752596AbdLGWm7 (ORCPT ); Thu, 7 Dec 2017 17:42:59 -0500 Received: from mail-wr0-f195.google.com ([209.85.128.195]:35096 "EHLO mail-wr0-f195.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751059AbdLGWmG (ORCPT ); Thu, 7 Dec 2017 17:42:06 -0500 X-Google-Smtp-Source: AGs4zMafrqipsPgFVZ6v9MF1nBph84UZB745opPfGCLmDMd5p1+qQDAY27ioy1fT1dFVlMVyvSv7aA== From: Alexey Dobriyan To: linux-kernel@vger.kernel.org Cc: x86@kernel.org, tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com, Alexey Dobriyan Subject: [PATCH 3/5] -march=native: REP MOVSB support Date: Fri, 8 Dec 2017 01:41:52 +0300 Message-Id: <20171207224154.4687-3-adobriyan@gmail.com> X-Mailer: git-send-email 2.13.6 In-Reply-To: <20171207224154.4687-1-adobriyan@gmail.com> References: <20171207224154.4687-1-adobriyan@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5337 Lines: 209 If CPU advertises fast REP MOVSB, use it. Inline copy_page() to use only 3 registers across function call not whole shebang as required by ABI. Also, tell gcc to use REP MOVSB for memcpy(), this saves terabytes of .text. Signed-off-by: Alexey Dobriyan --- Makefile | 3 +++ arch/x86/include/asm/page_64.h | 13 +++++++++++++ arch/x86/kernel/relocate_kernel_64.S | 15 +++++++++++++++ arch/x86/lib/Makefile | 5 ++++- arch/x86/lib/memcpy_64.S | 13 +++++++++++++ arch/x86/xen/xen-pvh.S | 4 ++++ scripts/kconfig/cpuid.c | 9 +++++++++ scripts/march-native.sh | 1 + 8 files changed, 62 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c1cc730b81a8..84abac4c181a 100644 --- a/Makefile +++ b/Makefile @@ -590,6 +590,9 @@ ifeq ($(dot-config),1) ifdef CONFIG_MARCH_NATIVE KBUILD_CFLAGS += -march=native endif +ifdef CONFIG_MARCH_NATIVE_REP_MOVSB +KBUILD_CFLAGS += -mmemcpy-strategy=rep_byte:-1:align +endif ifeq ($(KBUILD_EXTMOD),) # Read in dependencies to all Kconfig* files, make sure to run diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 4baa6bceb232..c2353661eaf1 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -50,7 +50,20 @@ static inline void clear_page(void *page) : "memory", "rax", "rcx"); } +#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB +static __always_inline void copy_page(void *to, void *from) +{ + uint32_t len = PAGE_SIZE; + asm volatile ( + "rep movsb" + : "+D" (to), "+S" (from), "+c" (len) + : + : "memory" + ); +} +#else void copy_page(void *to, void *from); +#endif #ifdef CONFIG_X86_MCE #define arch_unmap_kpfn arch_unmap_kpfn diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 307d3bac5f04..6ccfb9a63d5c 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -260,18 +260,33 @@ swap_pages: movq %rsi, %rax movq %r10, %rdi +#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB + mov $4096, %ecx + rep movsb +#else movl $512, %ecx rep ; movsq +#endif movq %rax, %rdi movq %rdx, %rsi +#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB + mov $4096, %ecx + rep movsb +#else movl $512, %ecx rep ; movsq +#endif movq %rdx, %rdi movq %r10, %rsi +#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB + mov $4096, %ecx + rep movsb +#else movl $512, %ecx rep ; movsq +#endif lea PAGE_SIZE(%rax), %rsi jmp 0b diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index c26ad76e7048..8f9460bef2ec 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -45,7 +45,10 @@ endif else obj-y += iomap_copy_64.o lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o - lib-y += clear_page_64.o copy_page_64.o + lib-y += clear_page_64.o +ifneq ($(CONFIG_MARCH_NATIVE_REP_MOVSB),y) + lib-y += copy_page_64.o +endif lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o lib-y += cmpxchg16b_emu.o diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 9a53a06e5a3e..28a807eb9bd6 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -15,6 +15,18 @@ .weak memcpy +#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB +ENTRY(__memcpy) +ENTRY(memcpy) + mov %rdi, %rax + mov %rdx, %rcx + rep movsb + ret +ENDPROC(memcpy) +ENDPROC(__memcpy) +EXPORT_SYMBOL(memcpy) +EXPORT_SYMBOL(__memcpy) +#else /* * memcpy - Copy a memory block. * @@ -181,6 +193,7 @@ ENTRY(memcpy_orig) .Lend: retq ENDPROC(memcpy_orig) +#endif #ifndef CONFIG_UML /* diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S index e1a5fbeae08d..1dc30f0dfdf7 100644 --- a/arch/x86/xen/xen-pvh.S +++ b/arch/x86/xen/xen-pvh.S @@ -68,9 +68,13 @@ ENTRY(pvh_start_xen) mov $_pa(pvh_start_info), %edi mov %ebx, %esi mov _pa(pvh_start_info_sz), %ecx +#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB + rep movsb +#else shr $2,%ecx rep movsl +#endif mov $_pa(early_stack_end), %esp diff --git a/scripts/kconfig/cpuid.c b/scripts/kconfig/cpuid.c index e565dd446bdf..4947f47e7728 100644 --- a/scripts/kconfig/cpuid.c +++ b/scripts/kconfig/cpuid.c @@ -43,6 +43,7 @@ static inline void cpuid2(uint32_t eax0, uint32_t ecx0, uint32_t *eax, uint32_t } static bool popcnt = false; +static bool rep_movsb = false; static uint32_t eax0_max; @@ -57,6 +58,13 @@ static void intel(void) if (ecx & (1 << 23)) popcnt = true; } + if (eax0_max >= 7) { + cpuid2(7, 0, &eax, &ecx, &edx, &ebx); +// printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx); + + if (ebx & (1 << 9)) + rep_movsb = true; + } } int main(int argc, char *argv[]) @@ -76,6 +84,7 @@ int main(int argc, char *argv[]) #define _(x) if (streq(opt, #x)) return x ? EXIT_SUCCESS : EXIT_FAILURE _(popcnt); + _(rep_movsb); #undef _ return EXIT_FAILURE; diff --git a/scripts/march-native.sh b/scripts/march-native.sh index 6641e356b646..eb52c20c56b4 100755 --- a/scripts/march-native.sh +++ b/scripts/march-native.sh @@ -31,6 +31,7 @@ option() { if test -x "$CPUID"; then "$CPUID" popcnt && option "CONFIG_MARCH_NATIVE_POPCNT" + "$CPUID" rep_movsb && option "CONFIG_MARCH_NATIVE_REP_MOVSB" fi if test ! -f "$AUTOCONF1" -o ! -f "$AUTOCONF2"; then -- 2.13.6