2018-03-11 19:25:12

by Jason Vas Dias

[permalink] [raw]
Subject: [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW


Currently the VDSO does not handle
clock_gettime( CLOCK_MONOTONIC_RAW, &ts )
on Intel / AMD - it calls
vdso_fallback_gettime()
for this clock, which issues a syscall, having an unacceptably high
latency (minimum measurable time or time between measurements)
of 300-700ns on 2 2.8-3.9ghz Haswell x86_64 Family'_'Model : 06_3C
machines under various versions of Linux.

Sometimes, particularly when correlating elapsed time to performance
counter values, code needs to know elapsed time from the perspective
of the CPU no matter how "hot" / fast or "cold" / slow it might be
running wrt NTP / PTP ; when code needs this, the latencies with
a syscall are often unacceptably high.

I reported this as Bug #198161 :
'https://bugzilla.kernel.org/show_bug.cgi?id=198961'
and in previous posts with subjects matching 'CLOCK_MONOTONIC_RAW' .

This patch handles CLOCK_MONOTONIC_RAW clock_gettime() in the VDSO ,
by exporting the raw clock calibration, last cycles, last xtime_nsec,
and last raw_sec value in the vsyscall_gtod_data during vsyscall_update() .

Now the new do_monotonic_raw() function in the vDSO has a latency of @ 24ns
on average, and the test program:
tools/testing/selftest/timers/inconsistency-check.c
succeeds with arguments: '-c 4 -t 120' or any arbitrary -t value.

The patch is against Linus' latest 4.16-rc4 tree,
current HEAD of :
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
.

The patch affects only files:

arch/x86/include/asm/vgtod.h
arch/x86/entry/vdso/vclock_gettime.c
arch/x86/entry/vsyscall/vsyscall_gtod.c


This is a resend of the original patch fixing indentation issues
after installation of emacs Lisp cc-mode hooks in
Documentation/coding-style.rst
and calling 'indent-region' and 'tabify' (whitespace only changes) - SORRY !

Best Regards,
Jason Vas Dias .

---
diff -up linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c.4.16-rc4 linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c
--- linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c.4.16-rc4 2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/entry/vdso/vclock_gettime.c 2018-03-11 19:00:04.630019100 +0000
@@ -182,6 +182,29 @@ notrace static u64 vread_tsc(void)
return last;
}

+notrace static u64 vread_tsc_raw(void)
+{
+ u64 tsc, last=gtod->raw_cycle_last;
+ if( likely( gtod->has_rdtscp ) ) {
+ u32 tsc_lo, tsc_hi,
+ tsc_cpu __attribute__((unused));
+ asm volatile
+ ( "rdtscp"
+ /* ^- has built-in cancellation point / pipeline stall"barrier" */
+ : "=a" (tsc_lo)
+ , "=d" (tsc_hi)
+ , "=c" (tsc_cpu)
+ ); // since all variables 32-bit, eax, edx, ecx used - NOT rax, rdx, rcx
+ tsc = ((((u64)tsc_hi) & 0xffffffffUL) << 32) | (((u64)tsc_lo) & 0xffffffffUL);
+ } else {
+ tsc = rdtsc_ordered();
+ }
+ if (likely(tsc >= last))
+ return tsc;
+ asm volatile ("");
+ return last;
+}
+
notrace static inline u64 vgetsns(int *mode)
{
u64 v;
@@ -203,6 +226,27 @@ notrace static inline u64 vgetsns(int *m
return v * gtod->mult;
}

+notrace static inline u64 vgetsns_raw(int *mode)
+{
+ u64 v;
+ cycles_t cycles;
+
+ if (gtod->vclock_mode == VCLOCK_TSC)
+ cycles = vread_tsc_raw();
+#ifdef CONFIG_PARAVIRT_CLOCK
+ else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
+ cycles = vread_pvclock(mode);
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+ else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
+ cycles = vread_hvclock(mode);
+#endif
+ else
+ return 0;
+ v = (cycles - gtod->raw_cycle_last) & gtod->raw_mask;
+ return v * gtod->raw_mult;
+}
+
/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
notrace static int __always_inline do_realtime(struct timespec *ts)
{
@@ -246,6 +290,27 @@ notrace static int __always_inline do_mo
return mode;
}

+notrace static int __always_inline do_monotonic_raw( struct timespec *ts)
+{
+ unsigned long seq;
+ u64 ns;
+ int mode;
+
+ do {
+ seq = gtod_read_begin(gtod);
+ mode = gtod->vclock_mode;
+ ts->tv_sec = gtod->monotonic_time_raw_sec;
+ ns = gtod->monotonic_time_raw_nsec;
+ ns += vgetsns_raw(&mode);
+ ns >>= gtod->raw_shift;
+ } while (unlikely(gtod_read_retry(gtod, seq)));
+
+ ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
+
+ return mode;
+}
+
notrace static void do_realtime_coarse(struct timespec *ts)
{
unsigned long seq;
@@ -277,6 +342,10 @@ notrace int __vdso_clock_gettime(clockid
if (do_monotonic(ts) == VCLOCK_NONE)
goto fallback;
break;
+ case CLOCK_MONOTONIC_RAW:
+ if (do_monotonic_raw(ts) == VCLOCK_NONE)
+ goto fallback;
+ break;
case CLOCK_REALTIME_COARSE:
do_realtime_coarse(ts);
break;
diff -up linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c.4.16-rc4 linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c
--- linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c.4.16-rc4 2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/entry/vsyscall/vsyscall_gtod.c 2018-03-11 19:06:36.584178867 +0000
@@ -16,6 +16,7 @@
#include <linux/timekeeper_internal.h>
#include <asm/vgtod.h>
#include <asm/vvar.h>
+#include <asm/cpufeature.h>

int vclocks_used __read_mostly;

@@ -45,6 +46,12 @@ void update_vsyscall(struct timekeeper *
vdata->mult = tk->tkr_mono.mult;
vdata->shift = tk->tkr_mono.shift;

+ vdata->raw_cycle_last = tk->tkr_raw.cycle_last;
+ vdata->raw_mask = tk->tkr_raw.mask;
+ vdata->raw_mult = tk->tkr_raw.mult;
+ vdata->raw_shift = tk->tkr_raw.shift;
+ vdata->has_rdtscp = static_cpu_has(X86_FEATURE_RDTSCP);
+
vdata->wall_time_sec = tk->xtime_sec;
vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;

@@ -74,5 +81,8 @@ void update_vsyscall(struct timekeeper *
vdata->monotonic_time_coarse_sec++;
}

+ vdata->monotonic_time_raw_sec = tk->raw_sec;
+ vdata->monotonic_time_raw_nsec = tk->tkr_raw.xtime_nsec;
+
gtod_write_end(vdata);
}
diff -up linux-4.16-rc4/arch/x86/include/asm/vgtod.h.4.16-rc4 linux-4.16-rc4/arch/x86/include/asm/vgtod.h
--- linux-4.16-rc4/arch/x86/include/asm/vgtod.h.4.16-rc4 2018-03-04 22:54:11.000000000 +0000
+++ linux-4.16-rc4/arch/x86/include/asm/vgtod.h 2018-03-11 18:58:36.670840250 +0000
@@ -22,6 +22,11 @@ struct vsyscall_gtod_data {
u64 mask;
u32 mult;
u32 shift;
+ u64 raw_cycle_last;
+ u64 raw_mask;
+ u32 raw_mult;
+ u32 raw_shift;
+ u32 has_rdtscp;

/* open coded 'struct timespec' */
u64 wall_time_snsec;
@@ -32,6 +37,8 @@ struct vsyscall_gtod_data {
gtod_long_t wall_time_coarse_nsec;
gtod_long_t monotonic_time_coarse_sec;
gtod_long_t monotonic_time_coarse_nsec;
+ gtod_long_t monotonic_time_raw_sec;
+ gtod_long_t monotonic_time_raw_nsec;

int tz_minuteswest;
int tz_dsttime;
---


2018-03-12 06:38:48

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

Hi Jason,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]

url: https://github.com/0day-ci/linux/commits/Jason-Vas-Dias/x86-vdso-on-Intel-VDSO-should-handle-CLOCK_MONOTONIC_RAW/20180312-141707
config: i386-randconfig-x006-201810 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386

All errors (new ones prefixed by >>):

>> arch/x86/entry/vdso/vdso32.so.dbg: undefined symbols found

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (709.00 B)
.config.gz (27.69 kB)
Download all attachments

2018-03-12 06:42:29

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v4.16-rc4 1/1] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

Hi Jason,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]

url: https://github.com/0day-ci/linux/commits/Jason-Vas-Dias/x86-vdso-on-Intel-VDSO-should-handle-CLOCK_MONOTONIC_RAW/20180312-141707
config: x86_64-randconfig-x002-201810 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64

All errors (new ones prefixed by >>):

arch/x86/entry/vdso/vclock_gettime.o: In function `__vdso_clock_gettime':
>> arch/x86/entry/vdso/vclock_gettime.c:336: undefined reference to `__x86_indirect_thunk_rax'
/usr/bin/ld: arch/x86/entry/vdso/vclock_gettime.o: relocation R_X86_64_PC32 against undefined symbol `__x86_indirect_thunk_rax' can not be used when making a shared object; recompile with -fPIC
/usr/bin/ld: final link failed: Bad value
collect2: error: ld returned 1 exit status

vim +336 arch/x86/entry/vdso/vclock_gettime.c

da15cfda arch/x86/vdso/vclock_gettime.c John Stultz 2009-08-19 333
23adec55 arch/x86/vdso/vclock_gettime.c Steven Rostedt 2008-05-12 334 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
2aae950b arch/x86_64/vdso/vclock_gettime.c Andi Kleen 2007-07-21 335 {
2aae950b arch/x86_64/vdso/vclock_gettime.c Andi Kleen 2007-07-21 @336 switch (clock) {
2aae950b arch/x86_64/vdso/vclock_gettime.c Andi Kleen 2007-07-21 337 case CLOCK_REALTIME:
ce39c640 arch/x86/vdso/vclock_gettime.c Stefani Seibold 2014-03-17 338 if (do_realtime(ts) == VCLOCK_NONE)
ce39c640 arch/x86/vdso/vclock_gettime.c Stefani Seibold 2014-03-17 339 goto fallback;
da15cfda arch/x86/vdso/vclock_gettime.c John Stultz 2009-08-19 340 break;
2aae950b arch/x86_64/vdso/vclock_gettime.c Andi Kleen 2007-07-21 341 case CLOCK_MONOTONIC:
ce39c640 arch/x86/vdso/vclock_gettime.c Stefani Seibold 2014-03-17 342 if (do_monotonic(ts) == VCLOCK_NONE)
ce39c640 arch/x86/vdso/vclock_gettime.c Stefani Seibold 2014-03-17 343 goto fallback;
da15cfda arch/x86/vdso/vclock_gettime.c John Stultz 2009-08-19 344 break;
ff72916d arch/x86/entry/vdso/vclock_gettime.c Jason Vas Dias 2018-03-11 345 case CLOCK_MONOTONIC_RAW:
ff72916d arch/x86/entry/vdso/vclock_gettime.c Jason Vas Dias 2018-03-11 346 if (do_monotonic_raw(ts) == VCLOCK_NONE)
ff72916d arch/x86/entry/vdso/vclock_gettime.c Jason Vas Dias 2018-03-11 347 goto fallback;
ff72916d arch/x86/entry/vdso/vclock_gettime.c Jason Vas Dias 2018-03-11 348 break;
da15cfda arch/x86/vdso/vclock_gettime.c John Stultz 2009-08-19 349 case CLOCK_REALTIME_COARSE:
ce39c640 arch/x86/vdso/vclock_gettime.c Stefani Seibold 2014-03-17 350 do_realtime_coarse(ts);
ce39c640 arch/x86/vdso/vclock_gettime.c Stefani Seibold 2014-03-17 351 break;
da15cfda arch/x86/vdso/vclock_gettime.c John Stultz 2009-08-19 352 case CLOCK_MONOTONIC_COARSE:
ce39c640 arch/x86/vdso/vclock_gettime.c Stefani Seibold 2014-03-17 353 do_monotonic_coarse(ts);
ce39c640 arch/x86/vdso/vclock_gettime.c Stefani Seibold 2014-03-17 354 break;
ce39c640 arch/x86/vdso/vclock_gettime.c Stefani Seibold 2014-03-17 355 default:
ce39c640 arch/x86/vdso/vclock_gettime.c Stefani Seibold 2014-03-17 356 goto fallback;
2aae950b arch/x86_64/vdso/vclock_gettime.c Andi Kleen 2007-07-21 357 }
0d7b8547 arch/x86/vdso/vclock_gettime.c Andy Lutomirski 2011-06-05 358
a939e817 arch/x86/vdso/vclock_gettime.c John Stultz 2012-03-01 359 return 0;
ce39c640 arch/x86/vdso/vclock_gettime.c Stefani Seibold 2014-03-17 360 fallback:
ce39c640 arch/x86/vdso/vclock_gettime.c Stefani Seibold 2014-03-17 361 return vdso_fallback_gettime(clock, ts);
2aae950b arch/x86_64/vdso/vclock_gettime.c Andi Kleen 2007-07-21 362 }
2aae950b arch/x86_64/vdso/vclock_gettime.c Andi Kleen 2007-07-21 363 int clock_gettime(clockid_t, struct timespec *)
2aae950b arch/x86_64/vdso/vclock_gettime.c Andi Kleen 2007-07-21 364 __attribute__((weak, alias("__vdso_clock_gettime")));
2aae950b arch/x86_64/vdso/vclock_gettime.c Andi Kleen 2007-07-21 365

:::::: The code at line 336 was first introduced by commit
:::::: 2aae950b21e4bc789d1fc6668faf67e8748300b7 x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu

:::::: TO: Andi Kleen <[email protected]>
:::::: CC: Linus Torvalds <[email protected]>

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (4.67 kB)
.config.gz (29.40 kB)
Download all attachments