2019-08-22 21:02:59

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()

__get_datapage() is only a few instructions to retrieve the
address of the page where the kernel stores data to the VDSO.

By inlining this function into its users, a bl/blr pair and
a mflr/mtlr pair is avoided, plus a few reg moves.

The improvement is noticeable (about 55 nsec/call on an 8xx)

vdsotest before the patch:
gettimeofday: vdso: 731 nsec/call
clock-gettime-realtime-coarse: vdso: 668 nsec/call
clock-gettime-monotonic-coarse: vdso: 745 nsec/call

vdsotest after the patch:
gettimeofday: vdso: 677 nsec/call
clock-gettime-realtime-coarse: vdso: 613 nsec/call
clock-gettime-monotonic-coarse: vdso: 690 nsec/call

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
4 files changed, 26 insertions(+), 37 deletions(-)
create mode 100644 arch/powerpc/kernel/vdso32/datapage.h

diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
index 7f882e7b9f43..e9453837e4ee 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -10,6 +10,8 @@
#include <asm/vdso.h>
#include <asm/asm-offsets.h>

+#include "datapage.h"
+
.text

/*
@@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- mr r11,r3
- bl __get_datapage@local
+ get_datapage r10, r0
mtlr r12
- mr r10,r3

lwz r7,CFG_DCACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
@@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)

lwz r7,CFG_ICACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 6984125b9fc0..d480d2d4a3fe 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -11,34 +11,13 @@
#include <asm/unistd.h>
#include <asm/vdso.h>

+#include "datapage.h"
+
.text
.global __kernel_datapage_offset;
__kernel_datapage_offset:
.long 0

-V_FUNCTION_BEGIN(__get_datapage)
- .cfi_startproc
- /* We don't want that exposed or overridable as we want other objects
- * to be able to bl directly to here
- */
- .protected __get_datapage
- .hidden __get_datapage
-
- mflr r0
- .cfi_register lr,r0
-
- bcl 20,31,data_page_branch
-data_page_branch:
- mflr r3
- mtlr r0
- addi r3, r3, __kernel_datapage_offset-data_page_branch
- lwz r0,0(r3)
- .cfi_restore lr
- add r3,r0,r3
- blr
- .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
/*
* void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
*
@@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
mflr r12
.cfi_register lr,r12
mr r4,r3
- bl __get_datapage@local
+ get_datapage r3, r0
mtlr r12
addi r3,r3,CFG_SYSCALL_MAP32
cmpli cr0,r4,0
@@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- bl __get_datapage@local
+ get_datapage r3, r0
lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
lwz r3,CFG_TB_TICKS_PER_SEC(r3)
mtlr r12
diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
new file mode 100644
index 000000000000..74f4f57c2da8
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/datapage.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+.macro get_datapage ptr, tmp
+ bcl 20,31,.+4
+ mflr \ptr
+ addi \ptr, \ptr, __kernel_datapage_offset - (.-4)
+ lwz \tmp, 0(\ptr)
+ add \ptr, \tmp, \ptr
+.endm
+
+
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 355b537d327a..3e55cba19f44 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -12,6 +12,8 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>

+#include "datapage.h"
+
/* Offset for the low 32-bit part of a field of long type */
#ifdef CONFIG_PPC64
#define LOPART 4
@@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)

mr r10,r3 /* r10 saves tv */
mr r11,r4 /* r11 saves tz */
- bl __get_datapage@local /* get data page */
- mr r9, r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
cmplwi r10,0 /* check if tv is NULL */
beq 3f
lis r7,1000000@ha /* load up USEC_PER_SEC */
@@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
mflr r12 /* r12 saves lr */
.cfi_register lr,r12
mr r11,r4 /* r11 saves tp */
- bl __get_datapage@local /* get data page */
- mr r9,r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
lis r7,NSEC_PER_SEC@h /* want nanoseconds */
ori r7,r7,NSEC_PER_SEC@l
beq cr5, .Lcoarse_clocks
@@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)

mflr r12
.cfi_register lr,r12
- bl __get_datapage@local /* get data page */
+ get_datapage r3, r0
lwz r5, CLOCK_HRTIMER_RES(r3)
mtlr r12
li r3,0
@@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time)
.cfi_register lr,r12

mr r11,r3 /* r11 holds t */
- bl __get_datapage@local
- mr r9, r3 /* datapage ptr in r9 */
+ get_datapage r9, r0

lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)

--
2.13.3


2019-08-26 05:47:32

by Santosh Sivaraj

[permalink] [raw]
Subject: Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()

Hi Christophe,

Christophe Leroy <[email protected]> writes:

> __get_datapage() is only a few instructions to retrieve the
> address of the page where the kernel stores data to the VDSO.
>
> By inlining this function into its users, a bl/blr pair and
> a mflr/mtlr pair is avoided, plus a few reg moves.
>
> The improvement is noticeable (about 55 nsec/call on an 8xx)
>
> vdsotest before the patch:
> gettimeofday: vdso: 731 nsec/call
> clock-gettime-realtime-coarse: vdso: 668 nsec/call
> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>
> vdsotest after the patch:
> gettimeofday: vdso: 677 nsec/call
> clock-gettime-realtime-coarse: vdso: 613 nsec/call
> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
> 4 files changed, 26 insertions(+), 37 deletions(-)
> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h

The datapage.h file should ideally be moved under include/asm, then we can use
the same for powerpc64 too.

Santosh

>
> diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
> index 7f882e7b9f43..e9453837e4ee 100644
> --- a/arch/powerpc/kernel/vdso32/cacheflush.S
> +++ b/arch/powerpc/kernel/vdso32/cacheflush.S
> @@ -10,6 +10,8 @@
> #include <asm/vdso.h>
> #include <asm/asm-offsets.h>
>
> +#include "datapage.h"
> +
> .text
>
> /*
> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
> .cfi_startproc
> mflr r12
> .cfi_register lr,r12
> - mr r11,r3
> - bl __get_datapage@local
> + get_datapage r10, r0
> mtlr r12
> - mr r10,r3
>
> lwz r7,CFG_DCACHE_BLOCKSZ(r10)
> addi r5,r7,-1
> - andc r6,r11,r5 /* round low to line bdy */
> + andc r6,r3,r5 /* round low to line bdy */
> subf r8,r6,r4 /* compute length */
> add r8,r8,r5 /* ensure we get enough */
> lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>
> lwz r7,CFG_ICACHE_BLOCKSZ(r10)
> addi r5,r7,-1
> - andc r6,r11,r5 /* round low to line bdy */
> + andc r6,r3,r5 /* round low to line bdy */
> subf r8,r6,r4 /* compute length */
> add r8,r8,r5
> lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
> diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
> index 6984125b9fc0..d480d2d4a3fe 100644
> --- a/arch/powerpc/kernel/vdso32/datapage.S
> +++ b/arch/powerpc/kernel/vdso32/datapage.S
> @@ -11,34 +11,13 @@
> #include <asm/unistd.h>
> #include <asm/vdso.h>
>
> +#include "datapage.h"
> +
> .text
> .global __kernel_datapage_offset;
> __kernel_datapage_offset:
> .long 0
>
> -V_FUNCTION_BEGIN(__get_datapage)
> - .cfi_startproc
> - /* We don't want that exposed or overridable as we want other objects
> - * to be able to bl directly to here
> - */
> - .protected __get_datapage
> - .hidden __get_datapage
> -
> - mflr r0
> - .cfi_register lr,r0
> -
> - bcl 20,31,data_page_branch
> -data_page_branch:
> - mflr r3
> - mtlr r0
> - addi r3, r3, __kernel_datapage_offset-data_page_branch
> - lwz r0,0(r3)
> - .cfi_restore lr
> - add r3,r0,r3
> - blr
> - .cfi_endproc
> -V_FUNCTION_END(__get_datapage)
> -
> /*
> * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
> *
> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
> mflr r12
> .cfi_register lr,r12
> mr r4,r3
> - bl __get_datapage@local
> + get_datapage r3, r0
> mtlr r12
> addi r3,r3,CFG_SYSCALL_MAP32
> cmpli cr0,r4,0
> @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
> .cfi_startproc
> mflr r12
> .cfi_register lr,r12
> - bl __get_datapage@local
> + get_datapage r3, r0
> lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
> lwz r3,CFG_TB_TICKS_PER_SEC(r3)
> mtlr r12
> diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
> new file mode 100644
> index 000000000000..74f4f57c2da8
> --- /dev/null
> +++ b/arch/powerpc/kernel/vdso32/datapage.h
> @@ -0,0 +1,11 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +.macro get_datapage ptr, tmp
> + bcl 20,31,.+4
> + mflr \ptr
> + addi \ptr, \ptr, __kernel_datapage_offset - (.-4)
> + lwz \tmp, 0(\ptr)
> + add \ptr, \tmp, \ptr
> +.endm
> +
> +
> diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
> index 355b537d327a..3e55cba19f44 100644
> --- a/arch/powerpc/kernel/vdso32/gettimeofday.S
> +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
> @@ -12,6 +12,8 @@
> #include <asm/asm-offsets.h>
> #include <asm/unistd.h>
>
> +#include "datapage.h"
> +
> /* Offset for the low 32-bit part of a field of long type */
> #ifdef CONFIG_PPC64
> #define LOPART 4
> @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>
> mr r10,r3 /* r10 saves tv */
> mr r11,r4 /* r11 saves tz */
> - bl __get_datapage@local /* get data page */
> - mr r9, r3 /* datapage ptr in r9 */
> + get_datapage r9, r0
> cmplwi r10,0 /* check if tv is NULL */
> beq 3f
> lis r7,1000000@ha /* load up USEC_PER_SEC */
> @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
> mflr r12 /* r12 saves lr */
> .cfi_register lr,r12
> mr r11,r4 /* r11 saves tp */
> - bl __get_datapage@local /* get data page */
> - mr r9,r3 /* datapage ptr in r9 */
> + get_datapage r9, r0
> lis r7,NSEC_PER_SEC@h /* want nanoseconds */
> ori r7,r7,NSEC_PER_SEC@l
> beq cr5, .Lcoarse_clocks
> @@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
>
> mflr r12
> .cfi_register lr,r12
> - bl __get_datapage@local /* get data page */
> + get_datapage r3, r0
> lwz r5, CLOCK_HRTIMER_RES(r3)
> mtlr r12
> li r3,0
> @@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time)
> .cfi_register lr,r12
>
> mr r11,r3 /* r11 holds t */
> - bl __get_datapage@local
> - mr r9, r3 /* datapage ptr in r9 */
> + get_datapage r9, r0
>
> lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
>
> --
> 2.13.3

2019-09-13 11:33:23

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()

Hi Santosh,

Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
> Hi Christophe,
>
> Christophe Leroy <[email protected]> writes:
>
>> __get_datapage() is only a few instructions to retrieve the
>> address of the page where the kernel stores data to the VDSO.
>>
>> By inlining this function into its users, a bl/blr pair and
>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>
>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>
>> vdsotest before the patch:
>> gettimeofday: vdso: 731 nsec/call
>> clock-gettime-realtime-coarse: vdso: 668 nsec/call
>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>>
>> vdsotest after the patch:
>> gettimeofday: vdso: 677 nsec/call
>> clock-gettime-realtime-coarse: vdso: 613 nsec/call
>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>>
>> Signed-off-by: Christophe Leroy <[email protected]>
>> ---
>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>> 4 files changed, 26 insertions(+), 37 deletions(-)
>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>
> The datapage.h file should ideally be moved under include/asm, then we can use
> the same for powerpc64 too.

I have a more ambitious project indeed.

Most of the VDSO code is duplicated between vdso32 and vdso64. I'm
aiming at merging everything into a single source code.

This means we would have to generate vdso32.so and vdso64.so out of the
same source files. Any idea on how to do that ? I'm not too good at
creating Makefiles. I guess we would have everything in
arch/powerpc/kernel/vdso/ and would have to build the objects twice,
once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/

Christophe

2019-09-13 13:36:51

by Santosh Sivaraj

[permalink] [raw]
Subject: Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()

Christophe Leroy <[email protected]> writes:

> Hi Santosh,
>
> Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
>> Hi Christophe,
>>
>> Christophe Leroy <[email protected]> writes:
>>
>>> __get_datapage() is only a few instructions to retrieve the
>>> address of the page where the kernel stores data to the VDSO.
>>>
>>> By inlining this function into its users, a bl/blr pair and
>>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>>
>>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>>
>>> vdsotest before the patch:
>>> gettimeofday: vdso: 731 nsec/call
>>> clock-gettime-realtime-coarse: vdso: 668 nsec/call
>>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>>>
>>> vdsotest after the patch:
>>> gettimeofday: vdso: 677 nsec/call
>>> clock-gettime-realtime-coarse: vdso: 613 nsec/call
>>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>>>
>>> Signed-off-by: Christophe Leroy <[email protected]>
>>> ---
>>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
>>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
>>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
>>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>>> 4 files changed, 26 insertions(+), 37 deletions(-)
>>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>>
>> The datapage.h file should ideally be moved under include/asm, then we can use
>> the same for powerpc64 too.
>
> I have a more ambitious project indeed.
>
> Most of the VDSO code is duplicated between vdso32 and vdso64. I'm
> aiming at merging everything into a single source code.
>
> This means we would have to generate vdso32.so and vdso64.so out of the
> same source files. Any idea on how to do that ? I'm not too good at
> creating Makefiles. I guess we would have everything in
> arch/powerpc/kernel/vdso/ and would have to build the objects twice,
> once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/

Should we need to build the objects twice? For 64 bit config it is going to be
a 64 bit build else a 32 bit build. It should suffice to get the single source
code compile for both, maybe with macros or (!)CONFIG_PPC64 conditional
compilation. Am I missing something when you say build twice?

Thanks,
Santosh

2019-09-13 13:52:25

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()



Le 13/09/2019 à 15:31, Santosh Sivaraj a écrit :
> Christophe Leroy <[email protected]> writes:
>
>> Hi Santosh,
>>
>> Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
>>> Hi Christophe,
>>>
>>> Christophe Leroy <[email protected]> writes:
>>>
>>>> __get_datapage() is only a few instructions to retrieve the
>>>> address of the page where the kernel stores data to the VDSO.
>>>>
>>>> By inlining this function into its users, a bl/blr pair and
>>>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>>>
>>>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>>>
>>>> vdsotest before the patch:
>>>> gettimeofday: vdso: 731 nsec/call
>>>> clock-gettime-realtime-coarse: vdso: 668 nsec/call
>>>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>>>>
>>>> vdsotest after the patch:
>>>> gettimeofday: vdso: 677 nsec/call
>>>> clock-gettime-realtime-coarse: vdso: 613 nsec/call
>>>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>>>>
>>>> Signed-off-by: Christophe Leroy <[email protected]>
>>>> ---
>>>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
>>>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
>>>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
>>>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>>>> 4 files changed, 26 insertions(+), 37 deletions(-)
>>>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>>>
>>> The datapage.h file should ideally be moved under include/asm, then we can use
>>> the same for powerpc64 too.
>>
>> I have a more ambitious project indeed.
>>
>> Most of the VDSO code is duplicated between vdso32 and vdso64. I'm
>> aiming at merging everything into a single source code.
>>
>> This means we would have to generate vdso32.so and vdso64.so out of the
>> same source files. Any idea on how to do that ? I'm not too good at
>> creating Makefiles. I guess we would have everything in
>> arch/powerpc/kernel/vdso/ and would have to build the objects twice,
>> once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/
>
> Should we need to build the objects twice? For 64 bit config it is going to be
> a 64 bit build else a 32 bit build. It should suffice to get the single source
> code compile for both, maybe with macros or (!)CONFIG_PPC64 conditional
> compilation. Am I missing something when you say build twice?
>

IIUC, on PPC64 we build vdso64 for 64bits user apps and vdso32 for
32bits user apps.

In arch/powerpc/kernel/Makefile, you have:

obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC64) += vdso64/

And in arch/powerpc/platforms/Kconfig.cputype, you have:

config VDSO32
def_bool y
depends on PPC32 || CPU_BIG_ENDIAN
help
This symbol controls whether we build the 32-bit VDSO. We obviously
want to do that if we're building a 32-bit kernel. If we're building
a 64-bit kernel then we only want a 32-bit VDSO if we're building for
big endian. That is because the only little endian configuration we
support is ppc64le which is 64-bit only.




Christophe

2019-09-13 14:06:01

by Santosh Sivaraj

[permalink] [raw]
Subject: Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()

Christophe Leroy <[email protected]> writes:

> Le 13/09/2019 à 15:31, Santosh Sivaraj a écrit :
>> Christophe Leroy <[email protected]> writes:
>>
>>> Hi Santosh,
>>>
>>> Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
>>>> Hi Christophe,
>>>>
>>>> Christophe Leroy <[email protected]> writes:
>>>>
>>>>> __get_datapage() is only a few instructions to retrieve the
>>>>> address of the page where the kernel stores data to the VDSO.
>>>>>
>>>>> By inlining this function into its users, a bl/blr pair and
>>>>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>>>>
>>>>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>>>>
>>>>> vdsotest before the patch:
>>>>> gettimeofday: vdso: 731 nsec/call
>>>>> clock-gettime-realtime-coarse: vdso: 668 nsec/call
>>>>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>>>>>
>>>>> vdsotest after the patch:
>>>>> gettimeofday: vdso: 677 nsec/call
>>>>> clock-gettime-realtime-coarse: vdso: 613 nsec/call
>>>>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>>>>>
>>>>> Signed-off-by: Christophe Leroy <[email protected]>
>>>>> ---
>>>>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
>>>>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
>>>>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
>>>>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>>>>> 4 files changed, 26 insertions(+), 37 deletions(-)
>>>>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>>>>
>>>> The datapage.h file should ideally be moved under include/asm, then we can use
>>>> the same for powerpc64 too.
>>>
>>> I have a more ambitious project indeed.
>>>
>>> Most of the VDSO code is duplicated between vdso32 and vdso64. I'm
>>> aiming at merging everything into a single source code.
>>>
>>> This means we would have to generate vdso32.so and vdso64.so out of the
>>> same source files. Any idea on how to do that ? I'm not too good at
>>> creating Makefiles. I guess we would have everything in
>>> arch/powerpc/kernel/vdso/ and would have to build the objects twice,
>>> once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/
>>
>> Should we need to build the objects twice? For 64 bit config it is going to be
>> a 64 bit build else a 32 bit build. It should suffice to get the single source
>> code compile for both, maybe with macros or (!)CONFIG_PPC64 conditional
>> compilation. Am I missing something when you say build twice?
>>
>
> IIUC, on PPC64 we build vdso64 for 64bits user apps and vdso32 for
> 32bits user apps.
>
> In arch/powerpc/kernel/Makefile, you have:
>
> obj-$(CONFIG_VDSO32) += vdso32/
> obj-$(CONFIG_PPC64) += vdso64/
>
> And in arch/powerpc/platforms/Kconfig.cputype, you have:
>
> config VDSO32
> def_bool y
> depends on PPC32 || CPU_BIG_ENDIAN
> help
> This symbol controls whether we build the 32-bit VDSO. We obviously
> want to do that if we're building a 32-bit kernel. If we're building
> a 64-bit kernel then we only want a 32-bit VDSO if we're building for
> big endian. That is because the only little endian configuration we
> support is ppc64le which is 64-bit only.
>

I didn't know we build 32 bit vdso for 64 bit big endians. But I don't think
its difficult to do it, might be a bit tricky. We can have two targets from
the same source.

SRC = vdso/*.c
OBJS_32 = $(SRC:.c=vdso32/.o)
OBJS_64 = $(SRC:.c=vdso64/.o)

Something like this would work. Of course, this is out of memory, might have to
do something slightly different for the Makefiles in kernel.

Thanks,
Santosh

>
>
>
> Christophe

2019-10-29 16:15:17

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()

Hi Santosh,

Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
> Hi Christophe,
>
> Christophe Leroy <[email protected]> writes:
>
>> __get_datapage() is only a few instructions to retrieve the
>> address of the page where the kernel stores data to the VDSO.
>>
>> By inlining this function into its users, a bl/blr pair and
>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>
>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>
>> vdsotest before the patch:
>> gettimeofday: vdso: 731 nsec/call
>> clock-gettime-realtime-coarse: vdso: 668 nsec/call
>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>>
>> vdsotest after the patch:
>> gettimeofday: vdso: 677 nsec/call
>> clock-gettime-realtime-coarse: vdso: 613 nsec/call
>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>>
>> Signed-off-by: Christophe Leroy <[email protected]>
>> ---
>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>> 4 files changed, 26 insertions(+), 37 deletions(-)
>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>
> The datapage.h file should ideally be moved under include/asm, then we can use
> the same for powerpc64 too.

Finally, I added the get_datapage macro to the existing asm/vdso_datapage.h

Christophe

>
> Santosh
>
>>
>> diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
>> index 7f882e7b9f43..e9453837e4ee 100644
>> --- a/arch/powerpc/kernel/vdso32/cacheflush.S
>> +++ b/arch/powerpc/kernel/vdso32/cacheflush.S
>> @@ -10,6 +10,8 @@
>> #include <asm/vdso.h>
>> #include <asm/asm-offsets.h>
>>
>> +#include "datapage.h"
>> +
>> .text
>>
>> /*
>> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>> .cfi_startproc
>> mflr r12
>> .cfi_register lr,r12
>> - mr r11,r3
>> - bl __get_datapage@local
>> + get_datapage r10, r0
>> mtlr r12
>> - mr r10,r3
>>
>> lwz r7,CFG_DCACHE_BLOCKSZ(r10)
>> addi r5,r7,-1
>> - andc r6,r11,r5 /* round low to line bdy */
>> + andc r6,r3,r5 /* round low to line bdy */
>> subf r8,r6,r4 /* compute length */
>> add r8,r8,r5 /* ensure we get enough */
>> lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
>> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>>
>> lwz r7,CFG_ICACHE_BLOCKSZ(r10)
>> addi r5,r7,-1
>> - andc r6,r11,r5 /* round low to line bdy */
>> + andc r6,r3,r5 /* round low to line bdy */
>> subf r8,r6,r4 /* compute length */
>> add r8,r8,r5
>> lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
>> diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
>> index 6984125b9fc0..d480d2d4a3fe 100644
>> --- a/arch/powerpc/kernel/vdso32/datapage.S
>> +++ b/arch/powerpc/kernel/vdso32/datapage.S
>> @@ -11,34 +11,13 @@
>> #include <asm/unistd.h>
>> #include <asm/vdso.h>
>>
>> +#include "datapage.h"
>> +
>> .text
>> .global __kernel_datapage_offset;
>> __kernel_datapage_offset:
>> .long 0
>>
>> -V_FUNCTION_BEGIN(__get_datapage)
>> - .cfi_startproc
>> - /* We don't want that exposed or overridable as we want other objects
>> - * to be able to bl directly to here
>> - */
>> - .protected __get_datapage
>> - .hidden __get_datapage
>> -
>> - mflr r0
>> - .cfi_register lr,r0
>> -
>> - bcl 20,31,data_page_branch
>> -data_page_branch:
>> - mflr r3
>> - mtlr r0
>> - addi r3, r3, __kernel_datapage_offset-data_page_branch
>> - lwz r0,0(r3)
>> - .cfi_restore lr
>> - add r3,r0,r3
>> - blr
>> - .cfi_endproc
>> -V_FUNCTION_END(__get_datapage)
>> -
>> /*
>> * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
>> *
>> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
>> mflr r12
>> .cfi_register lr,r12
>> mr r4,r3
>> - bl __get_datapage@local
>> + get_datapage r3, r0
>> mtlr r12
>> addi r3,r3,CFG_SYSCALL_MAP32
>> cmpli cr0,r4,0
>> @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
>> .cfi_startproc
>> mflr r12
>> .cfi_register lr,r12
>> - bl __get_datapage@local
>> + get_datapage r3, r0
>> lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
>> lwz r3,CFG_TB_TICKS_PER_SEC(r3)
>> mtlr r12
>> diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
>> new file mode 100644
>> index 000000000000..74f4f57c2da8
>> --- /dev/null
>> +++ b/arch/powerpc/kernel/vdso32/datapage.h
>> @@ -0,0 +1,11 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +
>> +.macro get_datapage ptr, tmp
>> + bcl 20,31,.+4
>> + mflr \ptr
>> + addi \ptr, \ptr, __kernel_datapage_offset - (.-4)
>> + lwz \tmp, 0(\ptr)
>> + add \ptr, \tmp, \ptr
>> +.endm
>> +
>> +
>> diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
>> index 355b537d327a..3e55cba19f44 100644
>> --- a/arch/powerpc/kernel/vdso32/gettimeofday.S
>> +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
>> @@ -12,6 +12,8 @@
>> #include <asm/asm-offsets.h>
>> #include <asm/unistd.h>
>>
>> +#include "datapage.h"
>> +
>> /* Offset for the low 32-bit part of a field of long type */
>> #ifdef CONFIG_PPC64
>> #define LOPART 4
>> @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>>
>> mr r10,r3 /* r10 saves tv */
>> mr r11,r4 /* r11 saves tz */
>> - bl __get_datapage@local /* get data page */
>> - mr r9, r3 /* datapage ptr in r9 */
>> + get_datapage r9, r0
>> cmplwi r10,0 /* check if tv is NULL */
>> beq 3f
>> lis r7,1000000@ha /* load up USEC_PER_SEC */
>> @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
>> mflr r12 /* r12 saves lr */
>> .cfi_register lr,r12
>> mr r11,r4 /* r11 saves tp */
>> - bl __get_datapage@local /* get data page */
>> - mr r9,r3 /* datapage ptr in r9 */
>> + get_datapage r9, r0
>> lis r7,NSEC_PER_SEC@h /* want nanoseconds */
>> ori r7,r7,NSEC_PER_SEC@l
>> beq cr5, .Lcoarse_clocks
>> @@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
>>
>> mflr r12
>> .cfi_register lr,r12
>> - bl __get_datapage@local /* get data page */
>> + get_datapage r3, r0
>> lwz r5, CLOCK_HRTIMER_RES(r3)
>> mtlr r12
>> li r3,0
>> @@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time)
>> .cfi_register lr,r12
>>
>> mr r11,r3 /* r11 holds t */
>> - bl __get_datapage@local
>> - mr r9, r3 /* datapage ptr in r9 */
>> + get_datapage r9, r0
>>
>> lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
>>
>> --
>> 2.13.3