2019-12-02 08:00:21

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v4 0/8] powerpc/vdso32 enhancement and optimisation

This series:
- adds getcpu() on non SMP ppc32
- adds coarse clocks in clock_gettime
- fixes and adds all clocks in clock_getres
- optimises the retrieval of the datapage address
- optimises the cache functions

v4:
- Rebased on top of ceb307474506 ("Merge tag 'y2038-cleanups-5.5' of git://git.kernel.org:/pub/scm/linux/kernel/git/arnd/playground")
- Fixed build failure with old binutils reported by mpe (patch 4)

v3:
- Dropped the 'fast syscall' hack for getcpu() on SMP.
- Moved get_datapage macro into asm/vdso_datapage.h so that it can be used on PPC64 as well.

v2:
- Used named labels in patch 2
- Added patch from Vincenzo to fix clock_getres() (patch 3)
- Removed unnecessary label in patch 4 as suggested by Segher
- Added patches 5 to 8

Christophe Leroy (8):
powerpc/32: Add VDSO version of getcpu on non SMP
powerpc/vdso32: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
powerpc: Fix vDSO clock_getres()
powerpc/vdso32: inline __get_datapage()
powerpc/vdso32: Don't read cache line size from the datapage on PPC32.
powerpc/vdso32: use LOAD_REG_IMMEDIATE()
powerpc/vdso32: implement clock_getres entirely
powerpc/vdso32: miscellaneous optimisations

arch/powerpc/include/asm/vdso_datapage.h | 16 +++-
arch/powerpc/kernel/asm-offsets.c | 7 +-
arch/powerpc/kernel/time.c | 1 +
arch/powerpc/kernel/vdso.c | 5 --
arch/powerpc/kernel/vdso32/Makefile | 4 +-
arch/powerpc/kernel/vdso32/cacheflush.S | 32 ++++++--
arch/powerpc/kernel/vdso32/datapage.S | 31 +-------
arch/powerpc/kernel/vdso32/getcpu.S | 23 +++++-
arch/powerpc/kernel/vdso32/gettimeofday.S | 124 +++++++++++++++++++++---------
arch/powerpc/kernel/vdso32/vdso32.lds.S | 2 +-
arch/powerpc/kernel/vdso64/gettimeofday.S | 7 +-
11 files changed, 164 insertions(+), 88 deletions(-)

--
2.13.3


2019-12-02 08:01:54

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v4 2/8] powerpc/vdso32: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE

This is copied and adapted from commit 5c929885f1bb ("powerpc/vdso64:
Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE")
from Santosh Sivaraj <[email protected]>

Benchmark from vdsotest-all:
clock-gettime-realtime: syscall: 3601 nsec/call
clock-gettime-realtime: libc: 1072 nsec/call
clock-gettime-realtime: vdso: 931 nsec/call
clock-gettime-monotonic: syscall: 4034 nsec/call
clock-gettime-monotonic: libc: 1213 nsec/call
clock-gettime-monotonic: vdso: 1076 nsec/call
clock-gettime-realtime-coarse: syscall: 2722 nsec/call
clock-gettime-realtime-coarse: libc: 805 nsec/call
clock-gettime-realtime-coarse: vdso: 668 nsec/call
clock-gettime-monotonic-coarse: syscall: 2949 nsec/call
clock-gettime-monotonic-coarse: libc: 882 nsec/call
clock-gettime-monotonic-coarse: vdso: 745 nsec/call

Additional test passed with:
vdsotest -d 30 clock-gettime-monotonic-coarse verify

Signed-off-by: Christophe Leroy <[email protected]>
Cc: Naveen N. Rao <[email protected]>
Cc: Santosh Sivaraj <[email protected]>
Link: https://github.com/linuxppc/issues/issues/41
---
v4: Using STAMP_XTIME_SEC and STAMP_XTIMER_NSEC instead of STAMP_XTIME following merge of latest 2038 fixing series.
---
arch/powerpc/kernel/vdso32/gettimeofday.S | 64 +++++++++++++++++++++++++++----
1 file changed, 57 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index c8e6902cb01b..7c1be86c1e90 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -69,7 +69,13 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
cmpli cr0,r3,CLOCK_REALTIME
cmpli cr1,r3,CLOCK_MONOTONIC
cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
+
+ cmpli cr5,r3,CLOCK_REALTIME_COARSE
+ cmpli cr6,r3,CLOCK_MONOTONIC_COARSE
+ cror cr5*4+eq,cr5*4+eq,cr6*4+eq
+
+ cror cr0*4+eq,cr0*4+eq,cr5*4+eq
+ bne cr0, .Lgettime_fallback

mflr r12 /* r12 saves lr */
.cfi_register lr,r12
@@ -78,8 +84,10 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
mr r9,r3 /* datapage ptr in r9 */
lis r7,NSEC_PER_SEC@h /* want nanoseconds */
ori r7,r7,NSEC_PER_SEC@l
-50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
- bne cr1,80f /* not monotonic -> all done */
+ beq cr5, .Lcoarse_clocks
+.Lprecise_clocks:
+ bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
+ bne cr1, .Lfinish /* not monotonic -> all done */

/*
* CLOCK_MONOTONIC
@@ -103,12 +111,53 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
add r9,r9,r0
lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
cmpl cr0,r8,r0 /* check if updated */
- bne- 50b
+ bne- .Lprecise_clocks
+ b .Lfinish_monotonic
+
+ /*
+ * For coarse clocks we get data directly from the vdso data page, so
+ * we don't need to call __do_get_tspec, but we still need to do the
+ * counter trick.
+ */
+.Lcoarse_clocks:
+ lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- .Lcoarse_clocks
+ add r9,r9,r0 /* r0 is already 0 */
+
+ /*
+ * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
+ * too
+ */
+ lwz r3,STAMP_XTIME_SEC+LOPART(r9)
+ lwz r4,STAMP_XTIME_NSEC+LOPART(r9)
+ bne cr6,1f
+
+ /* CLOCK_MONOTONIC_COARSE */
+ lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
+ lwz r6,WTOM_CLOCK_NSEC(r9)
+
+ /* check if counter has updated */
+ or r0,r6,r5
+1: or r0,r0,r3
+ or r0,r0,r4
+ xor r0,r0,r0
+ add r3,r3,r0
+ lwz r0,CFG_TB_UPDATE_COUNT+LOPART(r9)
+ cmpl cr0,r0,r8 /* check if updated */
+ bne- .Lcoarse_clocks
+
+ /* Counter has not updated, so continue calculating proper values for
+ * sec and nsec if monotonic coarse, or just return with the proper
+ * values for realtime.
+ */
+ bne cr6, .Lfinish

/* Calculate and store result. Note that this mimics the C code,
* which may cause funny results if nsec goes negative... is that
* possible at all ?
*/
+.Lfinish_monotonic:
add r3,r3,r5
add r4,r4,r6
cmpw cr0,r4,r7
@@ -116,11 +165,12 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
blt 1f
subf r4,r7,r4
addi r3,r3,1
-1: bge cr1,80f
+1: bge cr1, .Lfinish
addi r3,r3,-1
add r4,r4,r7

-80: stw r3,TSPC32_TV_SEC(r11)
+.Lfinish:
+ stw r3,TSPC32_TV_SEC(r11)
stw r4,TSPC32_TV_NSEC(r11)

mtlr r12
@@ -131,7 +181,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
/*
* syscall fallback
*/
-99:
+.Lgettime_fallback:
li r0,__NR_clock_gettime
.cfi_restore lr
sc
--
2.13.3