2020-01-13 17:11:56

by Christophe Leroy

[permalink] [raw]
Subject: [RFC PATCH v3 08/12] lib: vdso: allow arches to provide vdso data pointer

On powerpc, __arch_get_vdso_data() clobbers the link register,
requiring the caller to save it.

As the parent function already has to set a stack frame and saves
the link register before calling the C vdso function, retriving the
vdso data pointer there is lighter.

Give arches the opportunity to hand the vdso data pointer
to C vdso functions.

Signed-off-by: Christophe Leroy <[email protected]>
---
lib/vdso/gettimeofday.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 56 insertions(+)

diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index da15a8842825..ea1a55507af5 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -104,9 +104,15 @@ static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk,
}

static __maybe_unused int
+#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
+__cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock,
+ struct __kernel_timespec *ts)
+{
+#else
__cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts)
{
const struct vdso_data *vd = __arch_get_vdso_data();
+#endif
u32 msk;

/* Check for negative values or invalid clocks */
@@ -131,9 +137,16 @@ __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts)
}

static __maybe_unused int
+#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
+__cvdso_clock_gettime(const struct vdso_data *vd, clockid_t clock,
+ struct __kernel_timespec *ts)
+{
+ int ret = __cvdso_clock_gettime_common(vd, clock, ts);
+#else
__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
{
int ret = __cvdso_clock_gettime_common(clock, ts);
+#endif

if (unlikely(ret))
return clock_gettime_fallback(clock, ts);
@@ -141,12 +154,21 @@ __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
}

static __maybe_unused int
+#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
+__cvdso_clock_gettime32(const struct vdso_data *vd, clockid_t clock,
+ struct old_timespec32 *res)
+#else
__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
+#endif
{
struct __kernel_timespec ts;
int ret;

+#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
+ ret = __cvdso_clock_gettime_common(vd, clock, &ts);
+#else
ret = __cvdso_clock_gettime_common(clock, &ts);
+#endif

#ifdef VDSO_HAS_32BIT_FALLBACK
if (unlikely(ret))
@@ -164,9 +186,15 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
}

static __maybe_unused int
+#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
+__cvdso_gettimeofday(const struct vdso_data *vd, struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+#else
__cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
const struct vdso_data *vd = __arch_get_vdso_data();
+#endif

if (likely(tv != NULL)) {
struct __kernel_timespec ts;
@@ -187,9 +215,15 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
}

#ifdef VDSO_HAS_TIME
+#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
+static __maybe_unused __kernel_old_time_t
+__cvdso_time(const struct vdso_data *vd, __kernel_old_time_t *time)
+{
+#else
static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time)
{
const struct vdso_data *vd = __arch_get_vdso_data();
+#endif
__kernel_old_time_t t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);

if (time)
@@ -201,9 +235,15 @@ static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time

#ifdef VDSO_HAS_CLOCK_GETRES
static __maybe_unused
+#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
+int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock,
+ struct __kernel_timespec *res)
+{
+#else
int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
{
const struct vdso_data *vd = __arch_get_vdso_data();
+#endif
u32 msk;
u64 ns;

@@ -238,9 +278,16 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
}

static __maybe_unused
+#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
+int __cvdso_clock_getres(const struct vdso_data *vd, clockid_t clock,
+ struct __kernel_timespec *res)
+{
+ int ret = __cvdso_clock_getres_common(vd, clock, res);
+#else
int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
{
int ret = __cvdso_clock_getres_common(clock, res);
+#endif

if (unlikely(ret))
return clock_getres_fallback(clock, res);
@@ -248,12 +295,21 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
}

static __maybe_unused int
+#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
+__cvdso_clock_getres_time32(const struct vdso_data *vd, clockid_t clock,
+ struct old_timespec32 *res)
+#else
__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
+#endif
{
struct __kernel_timespec ts;
int ret;

+#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
+ ret = __cvdso_clock_getres_common(vd, clock, &ts);
+#else
ret = __cvdso_clock_getres_common(clock, &ts);
+#endif

#ifdef VDSO_HAS_32BIT_FALLBACK
if (unlikely(ret))
--
2.13.3


2020-01-14 23:08:22

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [RFC PATCH v3 08/12] lib: vdso: allow arches to provide vdso data pointer

Christophe Leroy <[email protected]> writes:
>
> static __maybe_unused int
> +#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
> +__cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock,
> + struct __kernel_timespec *ts)
> +{
> +#else
> __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts)
> {
> const struct vdso_data *vd = __arch_get_vdso_data();
> +#endif
> u32 msk;

If we do that, then there is no point in propagating this to the inner
functions. It's perfectly fine to have this distinction at the outermost
level.

As a related question, I noticed that you keep all that ASM voodoo in
the PPC specific code which provides the actual entry points. Is that
ASM code really still necessary? All current users of the generic VDSO
just do something like:

int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
{
return __cvdso_clock_gettime(clock, ts);
}

in the architecture code. Is there a reason why this can't work on PPC?

Thanks,

tglx

2020-01-15 06:17:15

by Christophe Leroy

[permalink] [raw]
Subject: Re: [RFC PATCH v3 08/12] lib: vdso: allow arches to provide vdso data pointer



Le 15/01/2020 à 00:06, Thomas Gleixner a écrit :
> Christophe Leroy <[email protected]> writes:
>>
>> static __maybe_unused int
>> +#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
>> +__cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock,
>> + struct __kernel_timespec *ts)
>> +{
>> +#else
>> __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts)
>> {
>> const struct vdso_data *vd = __arch_get_vdso_data();
>> +#endif
>> u32 msk;
>
> If we do that, then there is no point in propagating this to the inner
> functions. It's perfectly fine to have this distinction at the outermost
> level.

In v2, I did it at the arch level (see
https://patchwork.ozlabs.org/patch/1214983/). Andy was concerned about
it being suboptimal for arches which (unlike powerpc) have PC related
data addressing mode.

Wouldn't it be the same issue if doing it at the outermost level of
generic VDSO ?

>
> As a related question, I noticed that you keep all that ASM voodoo in
> the PPC specific code which provides the actual entry points. Is that
> ASM code really still necessary? All current users of the generic VDSO
> just do something like:
>
> int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
> {
> return __cvdso_clock_gettime(clock, ts);
> }
>
> in the architecture code. Is there a reason why this can't work on PPC?

The problem with powerpc is that VDSO functions have to (just like
system calls) set the SO bit in CR register in case of error, or clear
it if no error. There is no way to do that from the C function, because
there is no way to tell GCC to not play up with CR register on function
return.

Refer discussion at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92769

Christophe

2020-01-16 09:18:40

by Christophe Leroy

[permalink] [raw]
Subject: Re: [RFC PATCH v3 08/12] lib: vdso: allow arches to provide vdso data pointer

Thomas, Andy,

Le 15/01/2020 à 07:15, Christophe Leroy a écrit :
>
>
> Le 15/01/2020 à 00:06, Thomas Gleixner a écrit :
>> Christophe Leroy <[email protected]> writes:
>>>   static __maybe_unused int
>>> +#ifdef VDSO_GETS_VD_PTR_FROM_ARCH
>>> +__cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t
>>> clock,
>>> +              struct __kernel_timespec *ts)
>>> +{
>>> +#else
>>>   __cvdso_clock_gettime_common(clockid_t clock, struct
>>> __kernel_timespec *ts)
>>>   {
>>>       const struct vdso_data *vd = __arch_get_vdso_data();
>>> +#endif
>>>       u32 msk;
>>
>> If we do that, then there is no point in propagating this to the inner
>> functions. It's perfectly fine to have this distinction at the outermost
>> level.
>
> In v2, I did it at the arch level (see
> https://patchwork.ozlabs.org/patch/1214983/). Andy was concerned about
> it being suboptimal for arches which (unlike powerpc) have PC related
> data addressing mode.
>
> Wouldn't it be the same issue if doing it at the outermost level of
> generic VDSO ?

Any opinion on this ?

From your point of view, what should I do:
A/ __arch_get_vdso_data() handled entirely at arch level and arches
handing over the vdso data pointer to generic C VDSO functions all the
time (as in my v2 series) ?
B/ Data pointer being handed over all the way up for arches wanting to
do so, no changes at all for others (as in my v3 series) ?
C/ __arch_get_vdso_data() being called at the outermost generic level
for arches not interested in handling data pointer from the caller (as
suggested by Thomas) ?

Andy, with A/ you were concerned about arches being able to do PC
related accesses. Would it be an issue for C/ as well ? If not, I guess
C/ would be cleaner than B/ allthought not as clean as A which doesn't
add any #ifdefery at all.

Thanks
Christophe

2020-01-16 10:38:30

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [RFC PATCH v3 08/12] lib: vdso: allow arches to provide vdso data pointer

Christophe Leroy <[email protected]> writes:
> Le 15/01/2020 à 07:15, Christophe Leroy a écrit :
> From your point of view, what should I do:
> A/ __arch_get_vdso_data() handled entirely at arch level and arches
> handing over the vdso data pointer to generic C VDSO functions all the
> time (as in my v2 series) ?

No. That's again moving the same code to all architectures.

> B/ Data pointer being handed over all the way up for arches wanting to
> do so, no changes at all for others (as in my v3 series) ?

Too much ifdeffery

> C/ __arch_get_vdso_data() being called at the outermost generic level
> for arches not interested in handling data pointer from the caller (as
> suggested by Thomas) ?
>
> Andy, with A/ you were concerned about arches being able to do PC
> related accesses. Would it be an issue for C/ as well ? If not, I guess
> C/ would be cleaner than B/ allthought not as clean as A which doesn't
> add any #ifdefery at all.

You can avoid ifdeffery with C if you do:

static __maybe_unused int
__cvdso_data_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
const struct vdso_data *vd)
{
.....
}

static __maybe_unused int
__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
{
const struct vdso_data *vd = __arch_get_vdso_data();

return __cvdso_data_clock_gettime(clock, ts, vd);
}

and then use __cvdso_data_clock_gettime on PPC and let the other archs
unmodified.

Thanks,

tglx

2020-01-16 23:25:27

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC PATCH v3 08/12] lib: vdso: allow arches to provide vdso data pointer

On Thu, Jan 16, 2020 at 2:35 AM Thomas Gleixner <[email protected]> wrote:
>
> static __maybe_unused int
> __cvdso_data_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
> const struct vdso_data *vd)
> {
> .....
> }
>
> static __maybe_unused int
> __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
> {
> const struct vdso_data *vd = __arch_get_vdso_data();
>
> return __cvdso_data_clock_gettime(clock, ts, vd);
> }
>
> and then use __cvdso_data_clock_gettime on PPC and let the other archs
> unmodified.
>
>

FWIW, I did some experiments on x86 with gcc 9.2. gcc 9.2 uses
rip-relative accesses if I simplify the config enough and otherwise
materializes the pointer. Presumably it decides that the code size
reduction is worth it if there are a lot of accesses.

I suspect that tglx's suggestion will be fine or at worst will add
negligible overhead on x86_64.