2019-12-23 14:32:58

by Christophe Leroy

[permalink] [raw]
Subject: [RFC PATCH v2 04/10] lib: vdso: get pointer to vdso data from the arch

On powerpc, __arch_get_vdso_data() clobbers the link register,
requiring the caller to set a stack frame in order to save it.

As the parent function already has to set a stack frame and save
the link register to call the C vdso function, retriving the
vdso data pointer there is lighter.

Give arches the opportunity to hand the vdso data pointer
to C vdso functions.

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/arm/vdso/vgettimeofday.c | 12 ++++++++----
arch/arm64/kernel/vdso/vgettimeofday.c | 9 ++++++---
arch/arm64/kernel/vdso32/vgettimeofday.c | 12 ++++++++----
arch/mips/vdso/vgettimeofday.c | 21 ++++++++++++++-------
arch/x86/entry/vdso/vclock_gettime.c | 22 +++++++++++++++-------
lib/vdso/gettimeofday.c | 28 ++++++++++++++--------------
6 files changed, 65 insertions(+), 39 deletions(-)

diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c
index 5451afb715e6..efad7d508d06 100644
--- a/arch/arm/vdso/vgettimeofday.c
+++ b/arch/arm/vdso/vgettimeofday.c
@@ -10,7 +10,8 @@
int __vdso_clock_gettime(clockid_t clock,
struct old_timespec32 *ts)
{
- int ret = __cvdso_clock_gettime32(clock, ts);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_gettime32(vd, clock, ts);

if (likely(!ret))
return ret;
@@ -21,7 +22,8 @@ int __vdso_clock_gettime(clockid_t clock,
int __vdso_clock_gettime64(clockid_t clock,
struct __kernel_timespec *ts)
{
- int ret = __cvdso_clock_gettime(clock, ts);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_gettime(vd, clock, ts);

if (likely(!ret))
return ret;
@@ -32,7 +34,8 @@ int __vdso_clock_gettime64(clockid_t clock,
int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
struct timezone *tz)
{
- int ret = __cvdso_gettimeofday(tv, tz);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_gettimeofday(vd, tv, tz);

if (likely(!ret))
return ret;
@@ -43,7 +46,8 @@ int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
int __vdso_clock_getres(clockid_t clock_id,
struct old_timespec32 *res)
{
- int ret = __cvdso_clock_getres_time32(clock_id, res);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_getres_time32(vd, clock_id, res);

if (likely(!ret))
return ret;
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
index 62694876b216..9a7122ec6d17 100644
--- a/arch/arm64/kernel/vdso/vgettimeofday.c
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -11,7 +11,8 @@
int __kernel_clock_gettime(clockid_t clock,
struct __kernel_timespec *ts)
{
- int ret = __cvdso_clock_gettime(clock, ts);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_gettime(vd, clock, ts);

if (likely(!ret))
return ret;
@@ -22,7 +23,8 @@ int __kernel_clock_gettime(clockid_t clock,
int __kernel_gettimeofday(struct __kernel_old_timeval *tv,
struct timezone *tz)
{
- int ret = __cvdso_gettimeofday(tv, tz);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_gettimeofday(vd, tv, tz);

if (likely(!ret))
return ret;
@@ -33,7 +35,8 @@ int __kernel_gettimeofday(struct __kernel_old_timeval *tv,
int __kernel_clock_getres(clockid_t clock_id,
struct __kernel_timespec *res)
{
- int ret = __cvdso_clock_getres(clock_id, res);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_getres(vd, clock_id, res);

if (likely(!ret))
return ret;
diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c
index 6770d2bedd1f..3eb6a82c1c25 100644
--- a/arch/arm64/kernel/vdso32/vgettimeofday.c
+++ b/arch/arm64/kernel/vdso32/vgettimeofday.c
@@ -11,13 +11,14 @@
int __vdso_clock_gettime(clockid_t clock,
struct old_timespec32 *ts)
{
+ const struct vdso_data *vd = __arch_get_vdso_data();
int ret;

/* The checks below are required for ABI consistency with arm */
if ((u32)ts >= TASK_SIZE_32)
return -EFAULT;

- ret = __cvdso_clock_gettime32(clock, ts);
+ ret = __cvdso_clock_gettime32(vd, clock, ts);

if (likely(!ret))
return ret;
@@ -28,13 +29,14 @@ int __vdso_clock_gettime(clockid_t clock,
int __vdso_clock_gettime64(clockid_t clock,
struct __kernel_timespec *ts)
{
+ const struct vdso_data *vd = __arch_get_vdso_data();
int ret;

/* The checks below are required for ABI consistency with arm */
if ((u32)ts >= TASK_SIZE_32)
return -EFAULT;

- ret = __cvdso_clock_gettime(clock, ts);
+ ret = __cvdso_clock_gettime(vd, clock, ts);

if (likely(!ret))
return ret;
@@ -45,7 +47,8 @@ int __vdso_clock_gettime64(clockid_t clock,
int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
struct timezone *tz)
{
- int ret = __cvdso_gettimeofday(tv, tz);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_gettimeofday(vd, tv, tz);

if (likely(!ret))
return ret;
@@ -56,6 +59,7 @@ int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
int __vdso_clock_getres(clockid_t clock_id,
struct old_timespec32 *res)
{
+ const struct vdso_data *vd = __arch_get_vdso_data();
int ret;
struct __kernel_timespec ts;

@@ -63,7 +67,7 @@ int __vdso_clock_getres(clockid_t clock_id,
if ((u32)res >= TASK_SIZE_32)
return -EFAULT;

- ret = __cvdso_clock_getres_time32(clock_id, res);
+ ret = __cvdso_clock_getres_time32(vd, clock_id, res);

if (likely(!ret))
return ret;
diff --git a/arch/mips/vdso/vgettimeofday.c b/arch/mips/vdso/vgettimeofday.c
index 02758c58454c..65d79d4695da 100644
--- a/arch/mips/vdso/vgettimeofday.c
+++ b/arch/mips/vdso/vgettimeofday.c
@@ -14,7 +14,8 @@
int __vdso_clock_gettime(clockid_t clock,
struct old_timespec32 *ts)
{
- int ret = __cvdso_clock_gettime32(clock, ts);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_gettime32(vd, clock, ts);

if (likely(!ret))
return ret;
@@ -25,7 +26,8 @@ int __vdso_clock_gettime(clockid_t clock,
int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
struct timezone *tz)
{
- int ret = __cvdso_gettimeofday(tv, tz);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_gettimeofday(vd, tv, tz);

if (likely(!ret))
return ret;
@@ -36,7 +38,8 @@ int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
int __vdso_clock_getres(clockid_t clock_id,
struct old_timespec32 *res)
{
- int ret = __cvdso_clock_getres_time32(clock_id, res);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_getres_time32(vd, clock_id, res);

if (likely(!ret))
return ret;
@@ -47,7 +50,8 @@ int __vdso_clock_getres(clockid_t clock_id,
int __vdso_clock_gettime64(clockid_t clock,
struct __kernel_timespec *ts)
{
- int ret = __cvdso_clock_gettime(clock, ts);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_gettime(vd, clock, ts);

if (likely(!ret))
return ret;
@@ -60,7 +64,8 @@ int __vdso_clock_gettime64(clockid_t clock,
int __vdso_clock_gettime(clockid_t clock,
struct __kernel_timespec *ts)
{
- int ret = __cvdso_clock_gettime(clock, ts);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_gettime(vd, clock, ts);

if (likely(!ret))
return ret;
@@ -71,7 +76,8 @@ int __vdso_clock_gettime(clockid_t clock,
int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
struct timezone *tz)
{
- int ret = __cvdso_gettimeofday(tv, tz);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_gettimeofday(vd, tv, tz);

if (likely(!ret))
return ret;
@@ -82,7 +88,8 @@ int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
int __vdso_clock_getres(clockid_t clock_id,
struct __kernel_timespec *res)
{
- int ret = __cvdso_clock_getres(clock_id, res);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_getres(vd, clock_id, res);

if (likely(!ret))
return ret;
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index fde511cfe284..b87847b660dd 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -19,7 +19,8 @@ extern __kernel_old_time_t __vdso_time(__kernel_old_time_t *t);

int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
- int ret = __cvdso_gettimeofday(tv, tz);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_gettimeofday(vd, tv, tz);

if (likely(!ret))
return ret;
@@ -32,7 +33,9 @@ int gettimeofday(struct __kernel_old_timeval *, struct timezone *)

__kernel_old_time_t __vdso_time(__kernel_old_time_t *t)
{
- return __cvdso_time(t);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+
+ return __cvdso_time(vd, t);
}

__kernel_old_time_t time(__kernel_old_time_t *t) __attribute__((weak, alias("__vdso_time")));
@@ -45,7 +48,8 @@ extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res);

int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
{
- int ret = __cvdso_clock_gettime(clock, ts);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_gettime(vd, clock, ts);

if (likely(!ret))
return ret;
@@ -59,7 +63,8 @@ int clock_gettime(clockid_t, struct __kernel_timespec *)
int __vdso_clock_getres(clockid_t clock,
struct __kernel_timespec *res)
{
- int ret = __cvdso_clock_getres(clock_id, res);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_getres(vd, clock_id, res);

if (likely(!ret))
return ret;
@@ -76,7 +81,8 @@ extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res);

int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts)
{
- int ret = __cvdso_clock_gettime32(clock, ts);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_gettime32(vd, clock, ts);

if (likely(!ret))
return ret;
@@ -89,7 +95,8 @@ int clock_gettime(clockid_t, struct old_timespec32 *)

int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts)
{
- int ret = __cvdso_clock_gettime(clock, ts);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_gettime(vd, clock, ts);

if (likely(!ret))
return ret;
@@ -102,7 +109,8 @@ int clock_gettime64(clockid_t, struct __kernel_timespec *)

int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res)
{
- int ret = __cvdso_clock_getres_time32(clock, res);
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ int ret = __cvdso_clock_getres_time32(vd, clock, res);

if (likely(!ret))
return ret;
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index c6eeeb47f446..24e1ba838260 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -13,7 +13,6 @@
/*
* The generic vDSO implementation requires that gettimeofday.h
* provides:
- * - __arch_get_vdso_data(): to get the vdso datapage.
* - __arch_get_hw_counter(): to get the hw counter based on the
* clock_mode.
*/
@@ -79,9 +78,9 @@ static void do_coarse(const struct vdso_data *vd, clockid_t clk,
}

static __maybe_unused int
-__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+__cvdso_clock_gettime(const struct vdso_data *vd, clockid_t clock,
+ struct __kernel_timespec *ts)
{
- const struct vdso_data *vd = __arch_get_vdso_data();
u32 msk;

/* Check for negative values or invalid clocks */
@@ -105,10 +104,11 @@ __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
}

static __maybe_unused int
-__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
+__cvdso_clock_gettime32(const struct vdso_data *vd, clockid_t clock,
+ struct old_timespec32 *res)
{
struct __kernel_timespec ts;
- int ret = __cvdso_clock_gettime(clock, &ts);
+ int ret = __cvdso_clock_gettime(vd, clock, &ts);

if (likely(!ret)) {
res->tv_sec = ts.tv_sec;
@@ -118,10 +118,9 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
}

static __maybe_unused int
-__cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
+__cvdso_gettimeofday(const struct vdso_data *vd, struct __kernel_old_timeval *tv,
+ struct timezone *tz)
{
- const struct vdso_data *vd = __arch_get_vdso_data();
-
if (likely(tv != NULL)) {
struct __kernel_timespec ts;

@@ -141,9 +140,9 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
}

#ifdef VDSO_HAS_TIME
-static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time)
+static __maybe_unused __kernel_old_time_t
+__cvdso_time(const struct vdso_data *vd, __kernel_old_time_t *time)
{
- const struct vdso_data *vd = __arch_get_vdso_data();
__kernel_old_time_t t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);

if (time)
@@ -155,9 +154,9 @@ static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time

#ifdef VDSO_HAS_CLOCK_GETRES
static __maybe_unused
-int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
+int __cvdso_clock_getres(const struct vdso_data *vd, clockid_t clock,
+ struct __kernel_timespec *res)
{
- const struct vdso_data *vd = __arch_get_vdso_data();
u64 hrtimer_res;
u32 msk;
u64 ns;
@@ -199,10 +198,11 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
}

static __maybe_unused int
-__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
+__cvdso_clock_getres_time32(const struct vdso_data *vd, clockid_t clock,
+ struct old_timespec32 *res)
{
struct __kernel_timespec ts;
- int ret = __cvdso_clock_getres(clock, &ts);
+ int ret = __cvdso_clock_getres(vd, clock, &ts);

if (likely(!ret && res)) {
res->tv_sec = ts.tv_sec;
--
2.13.3


2019-12-24 02:28:45

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC PATCH v2 04/10] lib: vdso: get pointer to vdso data from the arch

On Mon, Dec 23, 2019 at 6:31 AM Christophe Leroy
<[email protected]> wrote:
>
> On powerpc, __arch_get_vdso_data() clobbers the link register,
> requiring the caller to set a stack frame in order to save it.
>
> As the parent function already has to set a stack frame and save
> the link register to call the C vdso function, retriving the
> vdso data pointer there is lighter.

I'm confused. Can't you inline __arch_get_vdso_data()? Or is the
issue that you can't retrieve the program counter on power without
clobbering the link register?

I would imagine that this patch generates worse code on any
architecture with PC-relative addressing modes (which includes at
least x86_64, and I would guess includes most modern architectures).

--Andy

2019-12-24 11:54:35

by Christophe Leroy

[permalink] [raw]
Subject: Re: [RFC PATCH v2 04/10] lib: vdso: get pointer to vdso data from the arch



Le 24/12/2019 à 03:27, Andy Lutomirski a écrit :
> On Mon, Dec 23, 2019 at 6:31 AM Christophe Leroy
> <[email protected]> wrote:
>>
>> On powerpc, __arch_get_vdso_data() clobbers the link register,
>> requiring the caller to set a stack frame in order to save it.
>>
>> As the parent function already has to set a stack frame and save
>> the link register to call the C vdso function, retriving the
>> vdso data pointer there is lighter.
>
> I'm confused. Can't you inline __arch_get_vdso_data()? Or is the
> issue that you can't retrieve the program counter on power without
> clobbering the link register?

Yes it can be inlined (I did it in V1
https://patchwork.ozlabs.org/patch/1180571/), but you can't do it
without clobbering the link register, because the only way to get the
program counter is to do to as if you were calling another function but
you call to the address which just follows where you are, so that it
sets LR which the simulated return address which corresponds to the
address following the branch.

static __always_inline
const struct vdso_data *__arch_get_vdso_data(void)
{
void *ptr;

asm volatile(
" bcl 20, 31, .+4;\n"
" mflr %0;\n"
" addi %0, %0, __kernel_datapage_offset - (.-4);\n"
: "=b"(ptr) : : "lr");

return ptr + *(unsigned long *)ptr;
}

>
> I would imagine that this patch generates worse code on any
> architecture with PC-relative addressing modes (which includes at
> least x86_64, and I would guess includes most modern architectures).

Why ? Powerpc is also using PC-relative addressing for all calls but
indirect calls.

As the other arch C VDSO callers are in C and written in such a way that
callee is inlined into callers, and as __arch_get_vdso_data() is
inlined, it should make no difference, shouldn't it ?

Christophe

2019-12-24 12:16:03

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC PATCH v2 04/10] lib: vdso: get pointer to vdso data from the arch



> On Dec 24, 2019, at 7:53 PM, christophe leroy <[email protected]> wrote:
>
> 
>
>> Le 24/12/2019 à 03:27, Andy Lutomirski a écrit :
>>> On Mon, Dec 23, 2019 at 6:31 AM Christophe Leroy
>>> <[email protected]> wrote:
>>>
>>> On powerpc, __arch_get_vdso_data() clobbers the link register,
>>> requiring the caller to set a stack frame in order to save it.
>>>
>>> As the parent function already has to set a stack frame and save
>>> the link register to call the C vdso function, retriving the
>>> vdso data pointer there is lighter.
>> I'm confused. Can't you inline __arch_get_vdso_data()? Or is the
>> issue that you can't retrieve the program counter on power without
>> clobbering the link register?
>
> Yes it can be inlined (I did it in V1 https://patchwork.ozlabs.org/patch/1180571/), but you can't do it without clobbering the link register, because the only way to get the program counter is to do to as if you were calling another function but you call to the address which just follows where you are, so that it sets LR which the simulated return address which corresponds to the address following the branch.
>
> static __always_inline
> const struct vdso_data *__arch_get_vdso_data(void)
> {
> void *ptr;
>
> asm volatile(
> " bcl 20, 31, .+4;\n"
> " mflr %0;\n"
> " addi %0, %0, __kernel_datapage_offset - (.-4);\n"
> : "=b"(ptr) : : "lr");
>
> return ptr + *(unsigned long *)ptr;
> }
>
>> I would imagine that this patch generates worse code on any
>> architecture with PC-relative addressing modes (which includes at
>> least x86_64, and I would guess includes most modern architectures).
>
> Why ? Powerpc is also using PC-relative addressing for all calls but indirect calls.

I mean PC-relative access for data. The data page is at a constant, known offset from the vDSO text.

I haven’t checked how much x86_64 benefits from this, but at least the non-array fields ought to be accessible with a PC-relative access.

It should be possible to refactor a little bit so that the compiler can still see what’s going on. Maybe your patch actually does this. I’d want to look at the assembly. This also might not matter much on x86_64 in particular, since x86_64 can convert a PC-relative address to an absolute address with a single instruction with no clobbers.

Does power have PC-relative data access? If so, I wonder if the code can be arranged so that even the array accesses don’t require computing an absolute address at any point.

2019-12-24 12:42:53

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC PATCH v2 04/10] lib: vdso: get pointer to vdso data from the arch

On Tue, Dec 24, 2019 at 4:15 AM Andy Lutomirski <[email protected]> wrote:
>
>
>
> > On Dec 24, 2019, at 7:53 PM, christophe leroy <[email protected]> wrote:
> >
> > 
> >
> >> Le 24/12/2019 à 03:27, Andy Lutomirski a écrit :
> >>> On Mon, Dec 23, 2019 at 6:31 AM Christophe Leroy
> >>> <[email protected]> wrote:
> >>>
> >>> On powerpc, __arch_get_vdso_data() clobbers the link register,
> >>> requiring the caller to set a stack frame in order to save it.
> >>>
> >>> As the parent function already has to set a stack frame and save
> >>> the link register to call the C vdso function, retriving the
> >>> vdso data pointer there is lighter.
> >> I'm confused. Can't you inline __arch_get_vdso_data()? Or is the
> >> issue that you can't retrieve the program counter on power without
> >> clobbering the link register?
> >
> > Yes it can be inlined (I did it in V1 https://patchwork.ozlabs.org/patch/1180571/), but you can't do it without clobbering the link register, because the only way to get the program counter is to do to as if you were calling another function but you call to the address which just follows where you are, so that it sets LR which the simulated return address which corresponds to the address following the branch.
> >
> > static __always_inline
> > const struct vdso_data *__arch_get_vdso_data(void)
> > {
> > void *ptr;
> >
> > asm volatile(
> > " bcl 20, 31, .+4;\n"
> > " mflr %0;\n"
> > " addi %0, %0, __kernel_datapage_offset - (.-4);\n"
> > : "=b"(ptr) : : "lr");
> >
> > return ptr + *(unsigned long *)ptr;
> > }
> >
> >> I would imagine that this patch generates worse code on any
> >> architecture with PC-relative addressing modes (which includes at
> >> least x86_64, and I would guess includes most modern architectures).
> >
> > Why ? Powerpc is also using PC-relative addressing for all calls but indirect calls.
>
> I mean PC-relative access for data. The data page is at a constant, known offset from the vDSO text.
>
> I haven’t checked how much x86_64 benefits from this, but at least the non-array fields ought to be accessible with a PC-relative access.
>
> It should be possible to refactor a little bit so that the compiler can still see what’s going on. Maybe your patch actually does this. I’d want to look at the assembly. This also might not matter much on x86_64 in particular, since x86_64 can convert a PC-relative address to an absolute address with a single instruction with no clobbers.
>
> Does power have PC-relative data access? If so, I wonder if the code can be arranged so that even the array accesses don’t require computing an absolute address at any point.

Indeed the x86 code is also suboptimal, but at least the unnecessary
absolute address calculation is cheap on x86_64. Ideally we'd pass
around offsets into the vdso data instead of passing pointers, and
maybe the compiler will figure it out. I can try to play with this in
the morning.

2019-12-24 14:47:59

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [RFC PATCH v2 04/10] lib: vdso: get pointer to vdso data from the arch

On Tue, Dec 24, 2019 at 08:15:11PM +0800, Andy Lutomirski wrote:
> Does power have PC-relative data access? If so, I wonder if the code can be arranged so that even the array accesses don’t require computing an absolute address at any point.

Not before ISA 3.0 (that is Power9).

The bcl/mflr dance isn't *that* expensive, if you use it sparingly.


Segher