2022-10-28 17:19:52

by Russell King (Oracle)

[permalink] [raw]
Subject: [PATCH 0/5] ARM: findbit assembly updates

Hi,

This series updates the arm32 assembly versions of the findbit
operations:

- Document ARMv5 code that calculates the bit offset
- Provide an updated ARMv7 implementation using the rbit instruction
- Switch to use macros instead of duplicating mostly identical code
- Switch to using word loads rather than byte loads
- Add unwinder information for backtracing

I've had it sitting around in-use for some time, and no issues have
arisen. Tested also outside the kernel tree in userspace and results
are the same with the previous implementation.

Testing with the find_bit benchmark module shows that these operations
coded in assembly are faster than the generic versions (previously
posted), so I believe they're worth keeping.

arch/arm/include/asm/assembler.h | 6 +
arch/arm/lib/findbit.S | 230 +++++++++++++++------------------------
2 files changed, 94 insertions(+), 142 deletions(-)

--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!


2022-10-28 17:24:54

by Russell King (Oracle)

[permalink] [raw]
Subject: [PATCH 2/5] ARM: findbit: provide more efficient ARMv7 implementation

Provide a more efficient ARMv7 implementation to determine the first
set bit in the supplied value.

Signed-off-by: Russell King (Oracle) <[email protected]>
---
arch/arm/lib/findbit.S | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 4c584bc4704b..256e095d490b 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -170,7 +170,11 @@ ENDPROC(_find_next_bit_be)
* One or more bits in the LSB of r3 are assumed to be set.
*/
.L_found:
-#if __LINUX_ARM_ARCH__ >= 5
+#if __LINUX_ARM_ARCH__ >= 7
+ rbit r3, r3 @ reverse bits
+ clz r3, r3 @ count high zero bits
+ add r0, r2, r3 @ add offset of first set bit
+#elif __LINUX_ARM_ARCH__ >= 5
rsb r0, r3, #0
and r3, r3, r0 @ mask out lowest bit set
clz r3, r3 @ count high zero bits
--
2.30.2


2022-10-28 17:26:12

by Russell King (Oracle)

[permalink] [raw]
Subject: [PATCH 4/5] ARM: findbit: operate by words

Convert the implementations to operate on words rather than bytes
which makes bitmap searching faster.

Signed-off-by: Russell King (Oracle) <[email protected]>
---
arch/arm/include/asm/assembler.h | 6 +++
arch/arm/lib/findbit.S | 78 ++++++++++++++++++--------------
2 files changed, 50 insertions(+), 34 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 90fbe4a3f9c8..28e18f79c300 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -761,6 +761,12 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
.endif
.endm

+ .if __LINUX_ARM_ARCH__ < 6
+ .set .Lrev_l_uses_tmp, 1
+ .else
+ .set .Lrev_l_uses_tmp, 0
+ .endif
+
/*
* bl_r - branch and link to register
*
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 8280f66d38a5..6ec584d16d46 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -14,32 +14,32 @@
#include <asm/assembler.h>
.text

+#ifdef __ARMEB__
+#define SWAB_ENDIAN le
+#else
+#define SWAB_ENDIAN be
+#endif
+
.macro find_first, endian, set, name
ENTRY(_find_first_\name\()bit_\endian)
teq r1, #0
beq 3f
mov r2, #0
-1:
- .ifc \endian, be
- eor r3, r2, #0x18
- ARM( ldrb r3, [r0, r3, lsr #3] )
- THUMB( lsr r3, #3 )
- THUMB( ldrb r3, [r0, r3] )
+1: ldr r3, [r0], #4
+ .ifeq \set
+ mvns r3, r3 @ invert/test bits
.else
- ARM( ldrb r3, [r0, r2, lsr #3] )
- THUMB( lsr r3, r2, #3 )
- THUMB( ldrb r3, [r0, r3] )
+ movs r3, r3 @ test bits
.endif
- .ifeq \set
- eors r3, r3, #0xff @ invert bits
+ .ifc \endian, SWAB_ENDIAN
+ bne .L_found_swab
.else
- movs r3, r3
+ bne .L_found @ found the bit?
.endif
- bne .L_found @ any now set - found zero bit
- add r2, r2, #8 @ next bit pointer
+ add r2, r2, #32 @ next index
2: cmp r2, r1 @ any more?
blo 1b
-3: mov r0, r1 @ no free bits
+3: mov r0, r1 @ no more bits
ret lr
ENDPROC(_find_first_\name\()bit_\endian)
.endm
@@ -48,24 +48,25 @@ ENDPROC(_find_first_\name\()bit_\endian)
ENTRY(_find_next_\name\()bit_\endian)
cmp r2, r1
bhs 3b
- ands ip, r2, #7
- beq 1b @ If new byte, goto old routine
- .ifc \endian, be
- eor r3, r2, #0x18
- ARM( ldrb r3, [r0, r3, lsr #3] )
- THUMB( lsr r3, #3 )
- THUMB( ldrb r3, [r0, r3] )
- .else
- ARM( ldrb r3, [r0, r2, lsr #3] )
- THUMB( lsr r3, r2, #3 )
- THUMB( ldrb r3, [r0, r3] )
+ mov ip, r2, lsr #5 @ word index
+ add r0, r0, ip, lsl #2
+ ands ip, r2, #31 @ bit position
+ beq 1b
+ ldr r3, [r0], #4
+ .ifeq \set
+ mvn r3, r3 @ invert bits
+ .endif
+ .ifc \endian, SWAB_ENDIAN
+ rev_l r3, ip
+ .if .Lrev_l_uses_tmp
+ @ we need to recompute ip because rev_l will have overwritten
+ @ it.
+ and ip, r2, #31 @ bit position
.endif
- .ifeq \set
- eor r3, r3, #0xff @ now looking for a 1 bit
.endif
movs r3, r3, lsr ip @ shift off unused bits
bne .L_found
- orr r2, r2, #7 @ if zero, then no bits here
+ orr r2, r2, #31 @ no zero bits
add r2, r2, #1 @ align bit pointer
b 2b @ loop for next bit
ENDPROC(_find_next_\name\()bit_\endian)
@@ -95,6 +96,8 @@ ENDPROC(_find_next_\name\()bit_\endian)
/*
* One or more bits in the LSB of r3 are assumed to be set.
*/
+.L_found_swab:
+ rev_l r3, ip
.L_found:
#if __LINUX_ARM_ARCH__ >= 7
rbit r3, r3 @ reverse bits
@@ -107,13 +110,20 @@ ENDPROC(_find_next_\name\()bit_\endian)
rsb r3, r3, #31 @ offset of first set bit
add r0, r2, r3 @ add offset of first set bit
#else
- tst r3, #0x0f
+ mov ip, #~0
+ tst r3, ip, lsr #16 @ test bits 0-15
+ addeq r2, r2, #16
+ moveq r3, r3, lsr #16
+ tst r3, #0x00ff
+ addeq r2, r2, #8
+ moveq r3, r3, lsr #8
+ tst r3, #0x000f
addeq r2, r2, #4
- movne r3, r3, lsl #4
- tst r3, #0x30
+ moveq r3, r3, lsr #4
+ tst r3, #0x0003
addeq r2, r2, #2
- movne r3, r3, lsl #2
- tst r3, #0x40
+ moveq r3, r3, lsr #2
+ tst r3, #0x0001
addeq r2, r2, #1
mov r0, r2
#endif
--
2.30.2