Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753948AbaBEIT3 (ORCPT ); Wed, 5 Feb 2014 03:19:29 -0500 Received: from www84.your-server.de ([213.133.104.84]:35998 "EHLO www84.your-server.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751768AbaBEITW (ORCPT ); Wed, 5 Feb 2014 03:19:22 -0500 From: stefani@seibold.net To: gregkh@linuxfoundation.org, linux-kernel@vger.kernel.org, x86@kernel.org, tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com, ak@linux.intel.com, aarcange@redhat.com, john.stultz@linaro.org, luto@amacapital.net, xemul@parallels.com, gorcunov@openvz.org, andriy.shevchenko@linux.intel.com Cc: Martin.Runge@rohde-schwarz.com, Andreas.Brief@rohde-schwarz.com, Stefani Seibold Subject: [PATCH v12 9/9] Add 32 bit VDSO time support for 64 bit kernel Date: Wed, 5 Feb 2014 09:20:04 +0100 Message-Id: <1391588404-28147-10-git-send-email-stefani@seibold.net> X-Mailer: git-send-email 1.8.5.3 In-Reply-To: <1391588404-28147-1-git-send-email-stefani@seibold.net> References: <1391588404-28147-1-git-send-email-stefani@seibold.net> X-Authenticated-Sender: stefani@seibold.net Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Stefani Seibold This patch add the VDSO time support for the IA32 Emulation Layer. Due the nature of the kernel headers and the LP64 compiler where the size of a long and a pointer differs against a 32 bit compiler, there is some type hacking necessary. The vsyscall_gtod_data struture must be a little bit rearranged, to serve 32- and 64-bit code access: - The seqcount_t was replaced by an unsigned, this makes the vsyscall_gtod_data intedepend of kernel configuration and internal functions. - The structure is now packed, so it can accessed from 32- und 64- bit code at the same time. - The inner struct clock was removed, to make packing of the while struct easier. The "unsigned seq" would be handled by functions derivated from seqcount_t. Signed-off-by: Stefani Seibold --- arch/x86/include/asm/vgtod.h | 20 +++--- arch/x86/kernel/vsyscall_gtod.c | 26 +++++-- arch/x86/vdso/vclock_gettime.c | 129 ++++++++++++++++++++++++---------- arch/x86/vdso/vdso32/vclock_gettime.c | 11 +++ include/uapi/linux/time.h | 2 +- 5 files changed, 132 insertions(+), 56 deletions(-) diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 46e24d3..2567b02 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -4,16 +4,18 @@ #include #include -struct vsyscall_gtod_data { - seqcount_t seq; +/* + * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time + * so the structure must be packed + */ +struct __attribute__((packed)) vsyscall_gtod_data { + unsigned seq; - struct { /* extract of a clocksource struct */ - int vclock_mode; - cycle_t cycle_last; - cycle_t mask; - u32 mult; - u32 shift; - } clock; + int vclock_mode; + cycle_t cycle_last; + cycle_t mask; + u32 mult; + u32 shift; /* open coded 'struct timespec' */ time_t wall_time_sec; diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c index 91862a4..ca48248 100644 --- a/arch/x86/kernel/vsyscall_gtod.c +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -16,6 +16,18 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); +static inline void gtod_write_begin(unsigned *s) +{ + ++*s; + smp_wmb(); +} + +static inline void gtod_write_end(unsigned *s) +{ + smp_wmb(); + ++*s; +} + void update_vsyscall_tz(void) { vsyscall_gtod_data.sys_tz = sys_tz; @@ -25,14 +37,14 @@ void update_vsyscall(struct timekeeper *tk) { struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; - write_seqcount_begin(&vdata->seq); + gtod_write_begin(&vdata->seq); /* copy vsyscall data */ - vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->clock->cycle_last; - vdata->clock.mask = tk->clock->mask; - vdata->clock.mult = tk->mult; - vdata->clock.shift = tk->shift; + vdata->vclock_mode = tk->clock->archdata.vclock_mode; + vdata->cycle_last = tk->clock->cycle_last; + vdata->mask = tk->clock->mask; + vdata->mult = tk->mult; + vdata->shift = tk->shift; vdata->wall_time_sec = tk->xtime_sec; vdata->wall_time_snsec = tk->xtime_nsec; @@ -55,6 +67,6 @@ void update_vsyscall(struct timekeeper *tk) vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, tk->wall_to_monotonic); - write_seqcount_end(&vdata->seq); + gtod_write_end(&vdata->seq); } diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 469e57b..b045aaa 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -31,12 +31,24 @@ #ifndef BUILD_VDSO32 +struct api_timeval { + long tv_sec; /* seconds */ + long tv_usec; /* microseconds */ +}; + +struct api_timespec { + long tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ +}; + +typedef long api_time_t; + static notrace cycle_t vread_hpet(void) { return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); } -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +notrace static long vdso_fallback_gettime(long clock, struct api_timespec *ts) { long ret; asm("syscall" : "=a" (ret) : @@ -44,7 +56,8 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) return ret; } -notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) +notrace static long vdso_fallback_gtod(struct api_timeval *tv, + struct timezone *tz) { long ret; @@ -57,6 +70,18 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) u8 hpet_page __attribute__((visibility("hidden"))); +struct api_timeval { + s32 tv_sec; /* seconds */ + s32 tv_usec; /* microseconds */ +}; + +struct api_timespec { + s32 tv_sec; /* seconds */ + s32 tv_nsec; /* microseconds */ +}; + +typedef s32 api_time_t; + #ifdef CONFIG_HPET_TIMER static notrace cycle_t vread_hpet(void) { @@ -64,7 +89,7 @@ static notrace cycle_t vread_hpet(void) } #endif -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +notrace static long vdso_fallback_gettime(long clock, struct api_timespec *ts) { long ret; @@ -74,12 +99,12 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) "call VDSO32_vsyscall \n" "pop %%ebx \n" : "=a" (ret) - : "0" (__NR_clock_gettime), "d" (clock), "c" (ts) + : "0" (__NR_ia32_clock_gettime), "d" (clock), "c" (ts) : "memory"); return ret; } -notrace static long vdso_fallback_gtod(struct timeval *tv, +notrace static long vdso_fallback_gtod(struct api_timeval *tv, struct timezone *tz) { long ret; @@ -90,7 +115,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, "call VDSO32_vsyscall \n" "pop %%ebx \n" : "=a" (ret) - : "0" (__NR_gettimeofday), "d" (tv), "c" (tz) + : "0" (__NR_ia32_gettimeofday), "d" (tv), "c" (tz) : "memory"); return ret; } @@ -157,7 +182,7 @@ static notrace cycle_t vread_pvclock(int *mode) *mode = VCLOCK_NONE; /* refer to tsc.c read_tsc() comment for rationale */ - last = gtod->clock.cycle_last; + last = gtod->cycle_last; if (likely(ret >= last)) return ret; @@ -181,7 +206,7 @@ notrace static cycle_t vread_tsc(void) rdtsc_barrier(); ret = (cycle_t)vget_cycles(); - last = gtod->clock.cycle_last; + last = gtod->cycle_last; if (likely(ret >= last)) return ret; @@ -202,20 +227,40 @@ notrace static inline u64 vgetsns(int *mode) { u64 v; cycles_t cycles; - if (gtod->clock.vclock_mode == VCLOCK_TSC) + if (gtod->vclock_mode == VCLOCK_TSC) cycles = vread_tsc(); #ifdef CONFIG_HPET_TIMER - else if (gtod->clock.vclock_mode == VCLOCK_HPET) + else if (gtod->vclock_mode == VCLOCK_HPET) cycles = vread_hpet(); #endif #ifdef CONFIG_PARAVIRT_CLOCK - else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) + else if (gtod->vclock_mode == VCLOCK_PVCLOCK) cycles = vread_pvclock(mode); #endif else return 0; - v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; - return v * gtod->clock.mult; + v = (cycles - gtod->cycle_last) & gtod->mask; + return v * gtod->mult; +} + +notrace static unsigned gtod_read_begin(const unsigned *s) +{ + unsigned ret; + +repeat: + ret = ACCESS_ONCE(*s); + if (unlikely(ret & 1)) { + cpu_relax(); + goto repeat; + } + smp_rmb(); + return ret; +} + +notrace static int gtod_read_retry(const unsigned *s, unsigned start) +{ + smp_rmb(); + return unlikely(*s != start); } /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ @@ -227,13 +272,13 @@ notrace static int __always_inline do_realtime(struct timespec *ts) ts->tv_nsec = 0; do { - seq = raw_read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; + seq = gtod_read_begin(>od->seq); + mode = gtod->vclock_mode; ts->tv_sec = gtod->wall_time_sec; ns = gtod->wall_time_snsec; ns += vgetsns(&mode); - ns >>= gtod->clock.shift; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); + ns >>= gtod->shift; + } while (unlikely(gtod_read_retry(>od->seq, seq))); timespec_add_ns(ts, ns); return mode; @@ -247,13 +292,13 @@ notrace static int do_monotonic(struct timespec *ts) ts->tv_nsec = 0; do { - seq = raw_read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; + seq = gtod_read_begin(>od->seq); + mode = gtod->vclock_mode; ts->tv_sec = gtod->monotonic_time_sec; ns = gtod->monotonic_time_snsec; ns += vgetsns(&mode); - ns >>= gtod->clock.shift; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); + ns >>= gtod->shift; + } while (unlikely(gtod_read_retry(>od->seq, seq))); timespec_add_ns(ts, ns); return mode; @@ -263,58 +308,64 @@ notrace static void do_realtime_coarse(struct timespec *ts) { unsigned long seq; do { - seq = raw_read_seqcount_begin(>od->seq); + seq = gtod_read_begin(>od->seq); ts->tv_sec = gtod->wall_time_coarse.tv_sec; ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); + } while (unlikely(gtod_read_retry(>od->seq, seq))); } notrace static void do_monotonic_coarse(struct timespec *ts) { unsigned long seq; do { - seq = raw_read_seqcount_begin(>od->seq); + seq = gtod_read_begin(>od->seq); ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); + } while (unlikely(gtod_read_retry(>od->seq, seq))); } -notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +notrace int __vdso_clock_gettime(clockid_t clock, struct api_timespec *ts) { + struct timespec tmp; + switch (clock) { case CLOCK_REALTIME: - if (do_realtime(ts) == VCLOCK_NONE) + if (do_realtime(&tmp) == VCLOCK_NONE) goto fallback; break; case CLOCK_MONOTONIC: - if (do_monotonic(ts) == VCLOCK_NONE) + if (do_monotonic(&tmp) == VCLOCK_NONE) goto fallback; break; case CLOCK_REALTIME_COARSE: - do_realtime_coarse(ts); + do_realtime_coarse(&tmp); break; case CLOCK_MONOTONIC_COARSE: - do_monotonic_coarse(ts); + do_monotonic_coarse(&tmp); break; default: goto fallback; } + ts->tv_sec = tmp.tv_sec; + ts->tv_nsec = tmp.tv_nsec; + return 0; fallback: return vdso_fallback_gettime(clock, ts); } -int clock_gettime(clockid_t, struct timespec *) +int clock_gettime(clockid_t, struct api_timespec *) __attribute__((weak, alias("__vdso_clock_gettime"))); -notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +notrace int __vdso_gettimeofday(struct api_timeval *tv, struct timezone *tz) { + struct timespec tmp; + if (likely(tv != NULL)) { - BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != - offsetof(struct timespec, tv_nsec) || - sizeof(*tv) != sizeof(struct timespec)); if (unlikely(do_realtime(&tmp) == VCLOCK_NONE)) return vdso_fallback_gtod(tv, tz); + tv->tv_sec = tmp.tv_sec; + tv->tv_usec = tmp.tv_nsec; tv->tv_usec /= 1000; } if (unlikely(tz != NULL)) { @@ -325,21 +376,21 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) return 0; } -int gettimeofday(struct timeval *, struct timezone *) +int gettimeofday(struct api_timeval *, struct timezone *) __attribute__((weak, alias("__vdso_gettimeofday"))); /* * This will break when the xtime seconds get inaccurate, but that is * unlikely */ -notrace time_t __vdso_time(time_t *t) +notrace api_time_t __vdso_time(api_time_t *t) { /* This is atomic on x86 so we don't need any locks. */ - time_t result = ACCESS_ONCE(gtod->wall_time_sec); + api_time_t result = ACCESS_ONCE(gtod->wall_time_sec); if (t) *t = result; return result; } -int time(time_t *t) +int time(api_time_t *t) __attribute__((weak, alias("__vdso_time"))); diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/vdso/vdso32/vclock_gettime.c index fab4ec6..b8a3b22 100644 --- a/arch/x86/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/vdso/vdso32/vclock_gettime.c @@ -2,6 +2,12 @@ #ifdef CONFIG_X86_64 +typedef signed long long __kernel_long_t; +typedef unsigned long long __kernel_ulong_t; +#define __kernel_long_t __kernel_long_t + +#include + #define _ASM_X86_PAGE_H #define __pa(x) 0 @@ -10,6 +16,11 @@ #undef CONFIG_ILLEGAL_POINTER_VALUE #define CONFIG_ILLEGAL_POINTER_VALUE 0 +#else + +#define __NR_ia32_clock_gettime __NR_clock_gettime +#define __NR_ia32_gettimeofday __NR_gettimeofday + #endif #include "../vclock_gettime.c" diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index e75e1b6..ebf3734 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -8,7 +8,7 @@ #define _STRUCT_TIMESPEC struct timespec { __kernel_time_t tv_sec; /* seconds */ - long tv_nsec; /* nanoseconds */ + __kernel_long_t tv_nsec; /* nanoseconds */ }; #endif -- 1.8.5.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/