On powerpc, VDSO functions and syscalls cannot be implemented in C
because the Linux kernel ABI requires that CR[SO] bit is set in case
of error and cleared when no error.
As this cannot be done in C, C VDSO functions and syscall'based
fallback need a trampoline in ASM.
By moving the fallback calls out of the common code, arches like
powerpc can implement both the call to C VDSO and the fallback call
in a single trampoline function.
The two advantages are:
- No need play back and forth with CR[SO] and negative return value.
- No stack frame is required in VDSO C functions for the fallbacks.
The performance improvement is significant on powerpc.
Signed-off-by: Christophe Leroy <[email protected]>
---
arch/arm/vdso/vgettimeofday.c | 28 +++++++++++++++---
arch/arm64/kernel/vdso/vgettimeofday.c | 21 ++++++++++++--
arch/arm64/kernel/vdso32/vgettimeofday.c | 35 ++++++++++++++++++++---
arch/mips/vdso/vgettimeofday.c | 49 +++++++++++++++++++++++++++-----
arch/x86/entry/vdso/vclock_gettime.c | 42 +++++++++++++++++++++++----
lib/vdso/gettimeofday.c | 31 ++++----------------
6 files changed, 156 insertions(+), 50 deletions(-)
diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c
index 1976c6f325a4..5451afb715e6 100644
--- a/arch/arm/vdso/vgettimeofday.c
+++ b/arch/arm/vdso/vgettimeofday.c
@@ -10,25 +10,45 @@
int __vdso_clock_gettime(clockid_t clock,
struct old_timespec32 *ts)
{
- return __cvdso_clock_gettime32(clock, ts);
+ int ret = __cvdso_clock_gettime32(clock, ts);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_gettime32_fallback(clock, &ts);
}
int __vdso_clock_gettime64(clockid_t clock,
struct __kernel_timespec *ts)
{
- return __cvdso_clock_gettime(clock, ts);
+ int ret = __cvdso_clock_gettime(clock, ts);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_gettime_fallback(clock, ts);
}
int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
struct timezone *tz)
{
- return __cvdso_gettimeofday(tv, tz);
+ int ret = __cvdso_gettimeofday(tv, tz);
+
+ if (likely(!ret))
+ return ret;
+
+ return gettimeofday_fallback(tv, tz);
}
int __vdso_clock_getres(clockid_t clock_id,
struct old_timespec32 *res)
{
- return __cvdso_clock_getres_time32(clock_id, res);
+ int ret = __cvdso_clock_getres_time32(clock_id, res);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_getres32_fallback(clock, res);
}
/* Avoid unresolved references emitted by GCC */
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
index 747635501a14..62694876b216 100644
--- a/arch/arm64/kernel/vdso/vgettimeofday.c
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -11,17 +11,32 @@
int __kernel_clock_gettime(clockid_t clock,
struct __kernel_timespec *ts)
{
- return __cvdso_clock_gettime(clock, ts);
+ int ret = __cvdso_clock_gettime(clock, ts);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_gettime_fallback(clock, ts);
}
int __kernel_gettimeofday(struct __kernel_old_timeval *tv,
struct timezone *tz)
{
- return __cvdso_gettimeofday(tv, tz);
+ int ret = __cvdso_gettimeofday(tv, tz);
+
+ if (likely(!ret))
+ return ret;
+
+ return gettimeofday_fallback(tv, tz);
}
int __kernel_clock_getres(clockid_t clock_id,
struct __kernel_timespec *res)
{
- return __cvdso_clock_getres(clock_id, res);
+ int ret = __cvdso_clock_getres(clock_id, res);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_getres_fallback(clock, res);
}
diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c
index 54fc1c2ce93f..6770d2bedd1f 100644
--- a/arch/arm64/kernel/vdso32/vgettimeofday.c
+++ b/arch/arm64/kernel/vdso32/vgettimeofday.c
@@ -11,37 +11,64 @@
int __vdso_clock_gettime(clockid_t clock,
struct old_timespec32 *ts)
{
+ int ret;
+
/* The checks below are required for ABI consistency with arm */
if ((u32)ts >= TASK_SIZE_32)
return -EFAULT;
- return __cvdso_clock_gettime32(clock, ts);
+ ret = __cvdso_clock_gettime32(clock, ts);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_gettime32_fallback(clock, &ts);
}
int __vdso_clock_gettime64(clockid_t clock,
struct __kernel_timespec *ts)
{
+ int ret;
+
/* The checks below are required for ABI consistency with arm */
if ((u32)ts >= TASK_SIZE_32)
return -EFAULT;
- return __cvdso_clock_gettime(clock, ts);
+ ret = __cvdso_clock_gettime(clock, ts);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_gettime_fallback(clock, ts);
}
int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
struct timezone *tz)
{
- return __cvdso_gettimeofday(tv, tz);
+ int ret = __cvdso_gettimeofday(tv, tz);
+
+ if (likely(!ret))
+ return ret;
+
+ return gettimeofday_fallback(tv, tz);
}
int __vdso_clock_getres(clockid_t clock_id,
struct old_timespec32 *res)
{
+ int ret;
+ struct __kernel_timespec ts;
+
/* The checks below are required for ABI consistency with arm */
if ((u32)res >= TASK_SIZE_32)
return -EFAULT;
- return __cvdso_clock_getres_time32(clock_id, res);
+ ret = __cvdso_clock_getres_time32(clock_id, res);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_getres32_fallback(clock, res);
}
/* Avoid unresolved references emitted by GCC */
diff --git a/arch/mips/vdso/vgettimeofday.c b/arch/mips/vdso/vgettimeofday.c
index 6ebdc37c89fc..02758c58454c 100644
--- a/arch/mips/vdso/vgettimeofday.c
+++ b/arch/mips/vdso/vgettimeofday.c
@@ -14,25 +14,45 @@
int __vdso_clock_gettime(clockid_t clock,
struct old_timespec32 *ts)
{
- return __cvdso_clock_gettime32(clock, ts);
+ int ret = __cvdso_clock_gettime32(clock, ts);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_gettime32_fallback(clock, ts);
}
int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
struct timezone *tz)
{
- return __cvdso_gettimeofday(tv, tz);
+ int ret = __cvdso_gettimeofday(tv, tz);
+
+ if (likely(!ret))
+ return ret;
+
+ return gettimeofday_fallback(tv, tz);
}
int __vdso_clock_getres(clockid_t clock_id,
struct old_timespec32 *res)
{
- return __cvdso_clock_getres_time32(clock_id, res);
+ int ret = __cvdso_clock_getres_time32(clock_id, res);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_getres32_fallback(clock, res);
}
int __vdso_clock_gettime64(clockid_t clock,
struct __kernel_timespec *ts)
{
- return __cvdso_clock_gettime(clock, ts);
+ int ret = __cvdso_clock_gettime(clock, ts);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_gettime_fallback(clock, ts);
}
#else
@@ -40,19 +60,34 @@ int __vdso_clock_gettime64(clockid_t clock,
int __vdso_clock_gettime(clockid_t clock,
struct __kernel_timespec *ts)
{
- return __cvdso_clock_gettime(clock, ts);
+ int ret = __cvdso_clock_gettime(clock, ts);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_gettime_fallback(clock, ts);
}
int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
struct timezone *tz)
{
- return __cvdso_gettimeofday(tv, tz);
+ int ret = __cvdso_gettimeofday(tv, tz);
+
+ if (likely(!ret))
+ return ret;
+
+ return gettimeofday_fallback(tv, tz);
}
int __vdso_clock_getres(clockid_t clock_id,
struct __kernel_timespec *res)
{
- return __cvdso_clock_getres(clock_id, res);
+ int ret = __cvdso_clock_getres(clock_id, res);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_getres_fallback(clock, res);
}
#endif
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 7d70935b6758..fde511cfe284 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -19,7 +19,12 @@ extern __kernel_old_time_t __vdso_time(__kernel_old_time_t *t);
int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
- return __cvdso_gettimeofday(tv, tz);
+ int ret = __cvdso_gettimeofday(tv, tz);
+
+ if (likely(!ret))
+ return ret;
+
+ return gettimeofday_fallback(tv, tz);
}
int gettimeofday(struct __kernel_old_timeval *, struct timezone *)
@@ -40,7 +45,12 @@ extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res);
int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
{
- return __cvdso_clock_gettime(clock, ts);
+ int ret = __cvdso_clock_gettime(clock, ts);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_gettime_fallback(clock, ts);
}
int clock_gettime(clockid_t, struct __kernel_timespec *)
@@ -49,7 +59,12 @@ int clock_gettime(clockid_t, struct __kernel_timespec *)
int __vdso_clock_getres(clockid_t clock,
struct __kernel_timespec *res)
{
- return __cvdso_clock_getres(clock, res);
+ int ret = __cvdso_clock_getres(clock_id, res);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_getres_fallback(clock, res);
}
int clock_getres(clockid_t, struct __kernel_timespec *)
__attribute__((weak, alias("__vdso_clock_getres")));
@@ -61,7 +76,12 @@ extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res);
int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts)
{
- return __cvdso_clock_gettime32(clock, ts);
+ int ret = __cvdso_clock_gettime32(clock, ts);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_gettime32_fallback(clock, ts);
}
int clock_gettime(clockid_t, struct old_timespec32 *)
@@ -69,7 +89,12 @@ int clock_gettime(clockid_t, struct old_timespec32 *)
int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts)
{
- return __cvdso_clock_gettime(clock, ts);
+ int ret = __cvdso_clock_gettime(clock, ts);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_gettime_fallback(clock, ts);
}
int clock_gettime64(clockid_t, struct __kernel_timespec *)
@@ -77,7 +102,12 @@ int clock_gettime64(clockid_t, struct __kernel_timespec *)
int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res)
{
- return __cvdso_clock_getres_time32(clock, res);
+ int ret = __cvdso_clock_getres_time32(clock, res);
+
+ if (likely(!ret))
+ return ret;
+
+ return clock_getres32_fallback(clock, res);
}
int clock_getres(clockid_t, struct old_timespec32 *)
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 59189ed49352..4618e274f1d5 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -16,9 +16,6 @@
* - __arch_get_vdso_data(): to get the vdso datapage.
* - __arch_get_hw_counter(): to get the hw counter based on the
* clock_mode.
- * - gettimeofday_fallback(): fallback for gettimeofday.
- * - clock_gettime_fallback(): fallback for clock_gettime.
- * - clock_getres_fallback(): fallback for clock_getres.
*/
#ifdef ENABLE_COMPAT_VDSO
#include <asm/vdso/compat_gettimeofday.h>
@@ -110,23 +107,14 @@ __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts)
static __maybe_unused int
__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
{
- int ret = __cvdso_clock_gettime_common(clock, ts);
-
- if (unlikely(ret))
- return clock_gettime_fallback(clock, ts);
- return 0;
+ return __cvdso_clock_gettime_common(clock, ts);
}
static __maybe_unused int
__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
{
struct __kernel_timespec ts;
- int ret;
-
- ret = __cvdso_clock_gettime_common(clock, &ts);
-
- if (unlikely(ret))
- return clock_gettime32_fallback(clock, res);
+ int ret = __cvdso_clock_gettime_common(clock, &ts);
if (likely(!ret)) {
res->tv_sec = ts.tv_sec;
@@ -144,7 +132,7 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
struct __kernel_timespec ts;
if (do_hres(&vd[CS_HRES_COARSE], CLOCK_REALTIME, &ts))
- return gettimeofday_fallback(tv, tz);
+ return -1;
tv->tv_sec = ts.tv_sec;
tv->tv_usec = (u32)ts.tv_nsec / NSEC_PER_USEC;
@@ -218,23 +206,14 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
{
- int ret = __cvdso_clock_getres_common(clock, res);
-
- if (unlikely(ret))
- return clock_getres_fallback(clock, res);
- return 0;
+ return __cvdso_clock_getres_common(clock, res);
}
static __maybe_unused int
__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
{
struct __kernel_timespec ts;
- int ret;
-
- ret = __cvdso_clock_getres_common(clock, &ts);
-
- if (unlikely(ret))
- return clock_getres32_fallback(clock, res);
+ int ret = __cvdso_clock_getres_common(clock, &ts);
if (likely(!ret && res)) {
res->tv_sec = ts.tv_sec;
--
2.13.3
On Mon, Dec 23, 2019 at 6:31 AM Christophe Leroy
<[email protected]> wrote:
>
> On powerpc, VDSO functions and syscalls cannot be implemented in C
> because the Linux kernel ABI requires that CR[SO] bit is set in case
> of error and cleared when no error.
>
> As this cannot be done in C, C VDSO functions and syscall'based
> fallback need a trampoline in ASM.
>
> By moving the fallback calls out of the common code, arches like
> powerpc can implement both the call to C VDSO and the fallback call
> in a single trampoline function.
Maybe the issue is that I'm not a powerpc person, but I don't
understand this. The common vDSO code is in C. Presumably this means
that you need an asm trampoline no matter what to call the C code. Is
the improvement that, with this change, you can have the asm
trampoline do a single branch, so it's logically:
ret = [call the C code];
if (ret == 0) {
set success bit;
} else {
ret = fallback;
if (ret == 0)
set success bit;
else
set failure bit;
}
return ret;
instead of:
ret = [call the C code, which includes the fallback];
if (ret == 0)
set success bit;
else
set failure bit;
It's not obvious to me that the former ought to be faster.
>
> The two advantages are:
> - No need play back and forth with CR[SO] and negative return value.
> - No stack frame is required in VDSO C functions for the fallbacks.
How is no stack frame required? Do you mean that the presence of the
fallback causes worse code generation? Can you improve the fallback
instead?
Le 24/12/2019 à 03:24, Andy Lutomirski a écrit :
> On Mon, Dec 23, 2019 at 6:31 AM Christophe Leroy
> <[email protected]> wrote:
>>
>> On powerpc, VDSO functions and syscalls cannot be implemented in C
>> because the Linux kernel ABI requires that CR[SO] bit is set in case
>> of error and cleared when no error.
>>
>> As this cannot be done in C, C VDSO functions and syscall'based
>> fallback need a trampoline in ASM.
>>
>> By moving the fallback calls out of the common code, arches like
>> powerpc can implement both the call to C VDSO and the fallback call
>> in a single trampoline function.
>
> Maybe the issue is that I'm not a powerpc person, but I don't
> understand this. The common vDSO code is in C. Presumably this means
> that you need an asm trampoline no matter what to call the C code. Is
> the improvement that, with this change, you can have the asm
> trampoline do a single branch, so it's logically:
>
> ret = [call the C code];
> if (ret == 0) {
> set success bit;
> } else {
> ret = fallback;
> if (ret == 0)
> set success bit;
> else
> set failure bit;
> }
More simple than above, in fact it is:
ret = [call the C code];
if (ret == 0) {
set success bit;
} else {
ret = fallback [ which sets the success/failure bit];
}
return ret
>
> return ret;
>
> instead of:
>
> ret = [call the C code, which includes the fallback];
C code cannot handle the success/failure bit so we need to do something
which does:
int assembly_to_fallback()
{
ret = [syscall the fallback]
if (success bit set)
return ret;
else
return -ret;
}
Also means going back and forth between the success bit and negative return.
> if (ret == 0)
> set success bit;
> else
> set failure bit;
>
> It's not obvious to me that the former ought to be faster.
>
>>
>> The two advantages are:
>> - No need play back and forth with CR[SO] and negative return value.
>> - No stack frame is required in VDSO C functions for the fallbacks.
>
> How is no stack frame required? Do you mean that the presence of the
> fallback causes worse code generation? Can you improve the fallback
> instead?
>
When function F1 calls function F2 (with BL insn), the link register
(LR) is set with the return address in F1, so that at the end of F2, F2
branches to LR (with BLR insn), that's how you return from functions.
When F2 calls function F3, the same happens, LR is set to the return of
F3 into F2. It means that F2 has to save LR in order to be able to
return to F1, otherwise the return address from F2 into F1 is lost.
But ... thinking about it once more, indeed fallback means doing a
syscall, and in fact I realise that syscalls won't clobber LR, so it
should be possible to do something. Let me try it.
Christophe
> On Dec 24, 2019, at 7:41 PM, christophe leroy <[email protected]> wrote:
>
>
>
>> Le 24/12/2019 à 03:24, Andy Lutomirski a écrit :
>>> On Mon, Dec 23, 2019 at 6:31 AM Christophe Leroy
>>> <[email protected]> wrote:
>>>
>>> On powerpc, VDSO functions and syscalls cannot be implemented in C
>>> because the Linux kernel ABI requires that CR[SO] bit is set in case
>>> of error and cleared when no error.
>>>
>>> As this cannot be done in C, C VDSO functions and syscall'based
>>> fallback need a trampoline in ASM.
>>>
>>> By moving the fallback calls out of the common code, arches like
>>> powerpc can implement both the call to C VDSO and the fallback call
>>> in a single trampoline function.
>> Maybe the issue is that I'm not a powerpc person, but I don't
>> understand this. The common vDSO code is in C. Presumably this means
>> that you need an asm trampoline no matter what to call the C code. Is
>> the improvement that, with this change, you can have the asm
>> trampoline do a single branch, so it's logically:
>> ret = [call the C code];
>> if (ret == 0) {
>> set success bit;
>> } else {
>> ret = fallback;
>> if (ret == 0)
>> set success bit;
>> else
>> set failure bit;
>> }
>
> More simple than above, in fact it is:
>
> ret = [call the C code];
> if (ret == 0) {
> set success bit;
> } else {
> ret = fallback [ which sets the success/failure bit];
> }
> return ret
Cute.
>
>
>> return ret;
>> instead of:
>> ret = [call the C code, which includes the fallback];
>
> C code cannot handle the success/failure bit so we need to do something which does:
>
> int assembly_to_fallback()
> {
> ret = [syscall the fallback]
> if (success bit set)
> return ret;
> else
> return -ret;
> }
Wait, your calling convention has syscalls return positive values on error?
But I think this is moot. The syscalls in question never return nonzero success values, so you should be able to inline the syscall without worrying about this.
>
> Also means going back and forth between the success bit and negative return.
>
>> if (ret == 0)
>> set success bit;
>> else
>> set failure bit;
>> It's not obvious to me that the former ought to be faster.
>>>
>>> The two advantages are:
>>> - No need play back and forth with CR[SO] and negative return value.
>>> - No stack frame is required in VDSO C functions for the fallbacks.
>> How is no stack frame required? Do you mean that the presence of the
>> fallback causes worse code generation? Can you improve the fallback
>> instead?
>
> When function F1 calls function F2 (with BL insn), the link register (LR) is set with the return address in F1, so that at the end of F2, F2 branches to LR (with BLR insn), that's how you return from functions.
>
> When F2 calls function F3, the same happens, LR is set to the return of F3 into F2. It means that F2 has to save LR in order to be able to return to F1, otherwise the return address from F2 into F1 is lost.
>
> But ... thinking about it once more, indeed fallback means doing a syscall, and in fact I realise that syscalls won't clobber LR, so it should be possible to do something. Let me try it.
>
With that plus assume that nonzero return means failure, I think you should have all your bases covered.