Received: by 2002:ac0:a582:0:0:0:0:0 with SMTP id m2-v6csp1214998imm; Tue, 2 Oct 2018 04:51:27 -0700 (PDT) X-Google-Smtp-Source: ACcGV60tvn3bWuD76wr6Rc/S1IiMRroRx4R0iJ0kI9I91mYuTcsDBq+XqyScJOJjOnt0iCXGX5U+ X-Received: by 2002:a17:902:e20d:: with SMTP id ce13-v6mr16085157plb.47.1538481087032; Tue, 02 Oct 2018 04:51:27 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1538481087; cv=none; d=google.com; s=arc-20160816; b=XTUyorFXTL3dz7jV5Sgkc/tS6GsfcncruydEEqpWe+CSa7H2H6/7VqxprLOwc4Lt8t ZA4e9JQRKNJD1IJTzg8BYm9OWLI8MGQszMb08tpbeDUO9LWYmQd9lcitIuvGmY10XmWp CjBnGraejhKO9VprRBcY1/lNZqiyeiy1y1CfPQMoubqur/zqtPsagfRdRjcO5sHDOXK+ Y2T8MOt930cC/KuGj6oiC4WLqe/bdAolXI9GQEdzCiLYkErQkL/yxXDmTpSfaIjBntMm IzsWh6izN8DvvLjAyzXtlQXRiq6lhN3pmJ8XBtA4lqEOI+G0I4eTMgM0PNi74pJ71TgE C8kQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=list-id:precedence:sender:mime-version:user-agent:references :message-id:in-reply-to:subject:cc:to:from:date; bh=6nLKZXwsW6qRXawOJQ0r5wLridIlJe1IFln8+Uj+lBA=; b=wzG8LTTz9nCxJ6+cF4bfVYJKPAuxkyhhaITxXCrGF/9aFKKO4++BoiS4Gh8CAZ3H4s zU3I/qK5CNxASRLbWbetwVkyZT8jt7aXQlys28wr/Te0xDIGSzgwz//VilT8zXFhYJil Asl01JBrON11IXA0tGFeguXPqbKt68ILfWovgonWeEwvKGtyRmuTYTQtrtQWBoLGZw9q FZvoTsmbXRHQTE4Ve8B8P6K7ky4492Tme38jshRUYiHhnLhQSVBE4EY5FNcLOIzIykBM LYwo97O3g8NerqDHpRhSelZEzQomrWEOpKtcPIb7O7di24d/G6ogHW+VACzRNuMCVI9f vRyw== ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org Return-Path: Received: from vger.kernel.org (vger.kernel.org. [209.132.180.67]) by mx.google.com with ESMTP id j184-v6si15910920pfg.210.2018.10.02.04.51.12; Tue, 02 Oct 2018 04:51:27 -0700 (PDT) Received-SPF: pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67; Authentication-Results: mx.google.com; spf=pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727434AbeJBSdM (ORCPT + 99 others); Tue, 2 Oct 2018 14:33:12 -0400 Received: from eddie.linux-mips.org ([148.251.95.138]:36828 "EHLO cvs.linux-mips.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727606AbeJBSdM (ORCPT ); Tue, 2 Oct 2018 14:33:12 -0400 Received: (from localhost user: 'macro', uid#1010) by eddie.linux-mips.org with ESMTP id S23994586AbeJBLuQ0I31M (ORCPT ); Tue, 2 Oct 2018 13:50:16 +0200 Date: Tue, 2 Oct 2018 12:50:16 +0100 (BST) From: "Maciej W. Rozycki" To: Ralf Baechle , Paul Burton cc: linux-mips@linux-mips.org, linux-kernel@vger.kernel.org Subject: [PATCH 2/2] MIPS: memset: Limit excessive `noreorder' assembly mode use In-Reply-To: Message-ID: References: User-Agent: Alpine 2.21 (LFD 202 2017-01-01) MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Rewrite to use the `reorder' assembly mode and remove manually scheduled delay slots except where GAS cannot schedule a delay-slot instruction due to a data dependency or a section switch (as is the case with the EX macro). No change in machine code produced. Signed-off-by: Maciej W. Rozycki --- arch/mips/lib/memset.S | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) linux-mips-memset-reorder.patch Index: linux-20180930-3maxp-defconfig/arch/mips/lib/memset.S =================================================================== --- linux-20180930-3maxp-defconfig.orig/arch/mips/lib/memset.S +++ linux-20180930-3maxp-defconfig/arch/mips/lib/memset.S @@ -78,7 +78,6 @@ #endif .endm - .set noreorder .align 5 /* @@ -94,13 +93,16 @@ .endif sltiu t0, a2, STORSIZE /* very small region? */ + .set noreorder bnez t0, .Lsmall_memset\@ andi t0, a0, STORMASK /* aligned? */ + .set reorder #ifdef CONFIG_CPU_MICROMIPS move t8, a1 /* used by 'swp' instruction */ move t9, a1 #endif + .set noreorder #ifndef CONFIG_CPU_DADDI_WORKAROUNDS beqz t0, 1f PTR_SUBU t0, STORSIZE /* alignment in bytes */ @@ -111,6 +113,7 @@ PTR_SUBU t0, AT /* alignment in bytes */ .set at #endif + .set reorder #ifndef CONFIG_CPU_MIPSR6 R10KCBARRIER(0(ra)) @@ -125,8 +128,10 @@ #else /* CONFIG_CPU_MIPSR6 */ #define STORE_BYTE(N) \ EX(sb, a1, N(a0), .Lbyte_fixup\@); \ + .set noreorder; \ beqz t0, 0f; \ - PTR_ADDU t0, 1; + PTR_ADDU t0, 1; \ + .set reorder; PTR_ADDU a2, t0 /* correct size */ PTR_ADDU t0, 1 @@ -148,16 +153,14 @@ #endif /* CONFIG_CPU_MIPSR6 */ 1: ori t1, a2, 0x3f /* # of full blocks */ xori t1, 0x3f + andi t0, a2, 0x40-STORSIZE beqz t1, .Lmemset_partial\@ /* no block to fill */ - andi t0, a2, 0x40-STORSIZE PTR_ADDU t1, a0 /* end address */ - .set reorder 1: PTR_ADDIU a0, 64 R10KCBARRIER(0(ra)) f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode bne t1, a0, 1b - .set noreorder .Lmemset_partial\@: R10KCBARRIER(0(ra)) @@ -173,20 +176,18 @@ PTR_SUBU t1, AT .set at #endif + PTR_ADDU a0, t0 /* dest ptr */ jr t1 - PTR_ADDU a0, t0 /* dest ptr */ - .set push - .set noreorder - .set nomacro /* ... but first do longs ... */ f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode -2: .set pop - andi a2, STORMASK /* At most one long to go */ +2: andi a2, STORMASK /* At most one long to go */ + .set noreorder beqz a2, 1f #ifndef CONFIG_CPU_MIPSR6 PTR_ADDU a0, a2 /* What's left */ + .set reorder R10KCBARRIER(0(ra)) #ifdef __MIPSEB__ EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@) @@ -195,6 +196,7 @@ #endif #else PTR_SUBU t0, $0, a2 + .set reorder move a2, zero /* No remaining longs */ PTR_ADDIU t0, 1 STORE_BYTE(0) @@ -210,20 +212,22 @@ #endif 0: #endif -1: jr ra - move a2, zero +1: move a2, zero + jr ra .Lsmall_memset\@: + PTR_ADDU t1, a0, a2 beqz a2, 2f - PTR_ADDU t1, a0, a2 1: PTR_ADDIU a0, 1 /* fill bytewise */ R10KCBARRIER(0(ra)) + .set noreorder bne t1, a0, 1b EX(sb, a1, -1(a0), .Lsmall_fixup\@) + .set reorder -2: jr ra /* done */ - move a2, zero +2: move a2, zero + jr ra /* done */ .if __memset == 1 END(memset) .set __memset, 0 @@ -237,14 +241,13 @@ * a2 = a2 - t0 + 1 */ PTR_SUBU a2, t0 + PTR_ADDIU a2, 1 jr ra - PTR_ADDIU a2, 1 #endif /* CONFIG_CPU_MIPSR6 */ .Lfirst_fixup\@: /* unset_bytes already in a2 */ jr ra - nop .Lfwd_fixup\@: /* @@ -255,8 +258,8 @@ andi a2, 0x3f LONG_L t0, THREAD_BUADDR(t0) LONG_ADDU a2, t1 + LONG_SUBU a2, t0 jr ra - LONG_SUBU a2, t0 .Lpartial_fixup\@: /* @@ -267,24 +270,21 @@ andi a2, STORMASK LONG_L t0, THREAD_BUADDR(t0) LONG_ADDU a2, a0 + LONG_SUBU a2, t0 jr ra - LONG_SUBU a2, t0 .Llast_fixup\@: /* unset_bytes already in a2 */ jr ra - nop .Lsmall_fixup\@: /* * unset_bytes = end_addr - current_addr + 1 * a2 = t1 - a0 + 1 */ - .set reorder PTR_SUBU a2, t1, a0 PTR_ADDIU a2, 1 jr ra - .set noreorder .endm @@ -298,8 +298,8 @@ LEAF(memset) EXPORT_SYMBOL(memset) + move v0, a0 /* result */ beqz a1, 1f - move v0, a0 /* result */ andi a1, 0xff /* spread fillword */ LONG_SLL t1, a1, 8