2018-03-15 16:08:29

by Jason Vas Dias

[permalink] [raw]
Subject: [PATCH v4.16-rc5 (3)] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW


Resent to address reviewer comments.

Currently, the VDSO does not handle
clock_gettime( CLOCK_MONOTONIC_RAW, &ts )
on Intel / AMD - it calls
vdso_fallback_gettime()
for this clock, which issues a syscall, having an unacceptably high
latency (minimum measurable time or time between measurements)
of 300-700ns on 2 2.8-3.9ghz Haswell x86_64 Family'_'Model : 06_3C
machines under various versions of Linux.

Sometimes, particularly when correlating elapsed time to performance
counter values, user-space code needs to know elapsed time from the
perspective of the CPU no matter how "hot" / fast or "cold" / slow it
might be running wrt NTP / PTP "real" time; when code needs this,
the latencies associated with a syscall are often unacceptably high.

I reported this as Bug #198161 :
'https://bugzilla.kernel.org/show_bug.cgi?id=198961'
and in previous posts with subjects matching 'CLOCK_MONOTONIC_RAW' .

This patch handles CLOCK_MONOTONIC_RAW clock_gettime() in the VDSO ,
by exporting the raw clock calibration, last cycles, last xtime_nsec,
and last raw_sec value in the vsyscall_gtod_data during vsyscall_update() .

Now the new do_monotonic_raw() function in the vDSO has a latency of @ 24ns
on average, and the test program:
tools/testing/selftest/timers/inconsistency-check.c
succeeds with arguments: '-c 4 -t 120' or any arbitrary -t value.

The patch is against Linus' latest 4.16-rc5 tree,
current HEAD of :
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
.

This patch affects only files:

arch/x86/include/asm/vgtod.h
arch/x86/entry/vdso/vclock_gettime.c
arch/x86/entry/vdso/vdso.lds.S
arch/x86/entry/vdso/vdsox32.lds.S
arch/x86/entry/vdso/vdso32/vdso32.lds.S
arch/x86/entry/vsyscall/vsyscall_gtod.c

There are 3 patches in the series :

Patch #1 makes the VDSO handle clock_gettime(CLOCK_MONOTONIC_RAW) with rdtsc_ordered()

Patches #2 & #3 should be considered "optional" :

Patch #2 makes the VDSO handle clock_gettime(CLOCK_MONOTONIC_RAW) with a new rdtscp() function in msr.h

Patch #3 makes the VDSO export TSC calibration data via a new function in the vDSO:
unsigned int __vdso_linux_tsc_calibration ( struct linux_tsc_calibration *tsc_cal )
that user code can optionally call.


Patch #2 makes clock_gettime(CLOCK_MONOTONIC_RAW) calls somewhat faster
than clock_gettime(CLOCK_MONOTONIC) calls.

I think something like Patch #3 is necessary to export TSC calibration data to user-space TSC readers.

It is entirely up to the kernel developers whether they want to include patches
#2 and #3, but I think something like Patch #1 really needs to get into a future
Linux release, as an unecessary latency of 200-1000ns for a timer that can tick
3 times per nanosecond is unacceptable.

Patches for kernels 3.10.0-21 and 4.9.65-rt23 (ARM) are attached to bug #198161.


Thanks & Best Regards,
Jason Vas Dias


2018-03-15 16:05:16

by Jason Vas Dias

[permalink] [raw]
Subject: [PATCH v4.16-rc5 3/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 03f3904..61d9633 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -21,12 +21,15 @@
#include <linux/math64.h>
#include <linux/time.h>
#include <linux/kernel.h>
+#include <uapi/asm/vdso_tsc_calibration.h>

#define gtod (&VVAR(vsyscall_gtod_data))

extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);
+extern unsigned int __vdso_tsc_calibration(
+ struct linux_tsc_calibration_s *tsc_cal);

#ifdef CONFIG_PARAVIRT_CLOCK
extern u8 pvclock_page
@@ -383,3 +386,25 @@ notrace time_t __vdso_time(time_t *t)
}
time_t time(time_t *t)
__attribute__((weak, alias("__vdso_time")));
+
+notrace unsigned int
+__vdso_linux_tsc_calibration(struct linux_tsc_calibration_s *tsc_cal)
+{
+ unsigned long seq;
+
+ do {
+ seq = gtod_read_begin(gtod);
+ if ((gtod->vclock_mode == VCLOCK_TSC) &&
+ (tsc_cal != ((void *)0UL))) {
+ tsc_cal->tsc_khz = gtod->tsc_khz;
+ tsc_cal->mult = gtod->raw_mult;
+ tsc_cal->shift = gtod->raw_shift;
+ return 1;
+ }
+ } while (unlikely(gtod_read_retry(gtod, seq)));
+
+ return 0;
+}
+
+unsigned int linux_tsc_calibration(struct linux_tsc_calibration_s *tsc_cal)
+ __attribute((weak, alias("__vdso_linux_tsc_calibration")));
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index d3a2dce..e0b5cce 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -25,6 +25,8 @@ VERSION {
__vdso_getcpu;
time;
__vdso_time;
+ linux_tsc_calibration;
+ __vdso_linux_tsc_calibration;
local: *;
};
}
diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
index 422764a..17fd07f 100644
--- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
@@ -26,6 +26,7 @@ VERSION
__vdso_clock_gettime;
__vdso_gettimeofday;
__vdso_time;
+ __vdso_linux_tsc_calibration;
};

LINUX_2.5 {
diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S
index 05cd1c5..7acac71 100644
--- a/arch/x86/entry/vdso/vdsox32.lds.S
+++ b/arch/x86/entry/vdso/vdsox32.lds.S
@@ -21,6 +21,7 @@ VERSION {
__vdso_gettimeofday;
__vdso_getcpu;
__vdso_time;
+ __vdso_linux_tsc_calibration;
local: *;
};
}
diff --git a/arch/x86/include/uapi/asm/vdso_tsc_calibration.h b/arch/x86/include/uapi/asm/vdso_tsc_calibration.h
new file mode 100644
index 0000000..ce4b5a45
--- /dev/null
+++ b/arch/x86/include/uapi/asm/vdso_tsc_calibration.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_X86_VDSO_TSC_CALIBRATION_H
+#define _ASM_X86_VDSO_TSC_CALIBRATION_H
+/*
+ * Programs that want to use rdtsc / rdtscp instructions
+ * from user-space can make use of the Linux kernel TSC calibration
+ * by calling :
+ * __vdso_linux_tsc_calibration(struct linux_tsc_calibration_s *);
+ * ( one has to resolve this symbol as in
+ * tools/testing/selftests/vDSO/parse_vdso.c
+ * )
+ * which fills in a structure
+ * with the following layout :
+ */
+
+/** struct linux_tsc_calibration_s -
+ * mult: amount to multiply 64-bit TSC value by
+ * shift: the right shift to apply to (mult*TSC) yielding nanoseconds
+ * tsc_khz: the calibrated TSC frequency in KHz from which previous
+ * members calculated
+ */
+struct linux_tsc_calibration_s {
+
+ unsigned int mult;
+ unsigned int shift;
+ unsigned int tsc_khz;
+
+};
+
+/* To use:
+ *
+ * static unsigned
+ * (*linux_tsc_cal)(struct linux_tsc_calibration_s *linux_tsc_cal) =
+ * vdso_sym("LINUX_2.6", "__vdso_linux_tsc_calibration");
+ * if(linux_tsc_cal == ((void *)0))
+ * { fprintf(stderr,"the patch providing __vdso_linux_tsc_calibration"
+ * " is not applied to the kernel.\n");
+ * return ERROR;
+ * }
+ * static struct linux_tsc_calibration clock_source={0};
+ * if((clock_source.mult==0) && ! (*linux_tsc_cal)(&clock_source) )
+ * fprintf(stderr,"TSC is not the system clocksource.\n");
+ * unsigned int tsc_lo, tsc_hi, tsc_cpu;
+ * asm volatile
+ * ( "rdtscp" : (=a) tsc_hi, (=d) tsc_lo, (=c) tsc_cpu );
+ * unsigned long tsc = (((unsigned long)tsc_hi) << 32) | tsc_lo;
+ * unsigned long nanoseconds =
+ * (( clock_source . mult ) * tsc ) >> (clock_source . shift);
+ *
+ * nanoseconds is now TSC value converted to nanoseconds,
+ * according to Linux' clocksource calibration values.
+ * Incidentally, 'tsc_cpu' is the number of the CPU the task is running on.
+ *
+ * But better results are obtained by applying this to the difference (delta)
+ * and adding this to some previous timespec value:
+ * static u64 previous_tsc=0, previous_nsec=0, previous_sec=0;
+ * u64 tsc = rdtscp();
+ * u64 delta = tsc - previous_tsc;
+ * u64 nsec = ((delta * clock_source.mult) + previous_nsec )
+ * >> clock_source.shift;
+ * ts->tv_sec = previous_sec + (nsec / NSEC_PER_SEC);
+ * ts->tv_nsec = nsec % NSEC_PER_SEC;
+ * previous_tsc = tsc
+ * previous_sec = ts->tv_sec;
+ * previous_nsec = ts->tv_nsec << clock_source.shift;
+ * return ts;
+ * This is broadly like the approach taken by Linux kernel & in VDSO .
+ *
+ * Or, in user-space, with floating point, one could use the rdtscp value as
+ * number of picoseconds :
+ * u64 ns = lround( ((double)rdtscp())
+ * / (((double)clock_source.tsc_khz) / 1e3)
+ * );
+ * (ie. if tsc_khz is 3000 , there are 3 tsc ticks per nanosecond,
+ * so divide tsc ticks by 3).
+ *
+ * There should actually be very little difference between the two
+ * values obtained (@ 0.02% ) by either method.
+ */
+
+#endif

2018-03-15 16:07:28

by Jason Vas Dias

[permalink] [raw]
Subject: [PATCH v4.16-rc5 1/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d..fbc7371 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -182,6 +182,18 @@ notrace static u64 vread_tsc(void)
return last;
}

+notrace static u64 vread_tsc_raw(void)
+{
+ u64 tsc
+ , last = gtod->raw_cycle_last;
+
+ tsc = rdtsc_ordered();
+ if (likely(tsc >= last))
+ return tsc;
+ asm volatile ("");
+ return last;
+}
+
notrace static inline u64 vgetsns(int *mode)
{
u64 v;
@@ -203,6 +215,27 @@ notrace static inline u64 vgetsns(int *mode)
return v * gtod->mult;
}

+notrace static inline u64 vgetsns_raw(int *mode)
+{
+ u64 v;
+ cycles_t cycles;
+
+ if (gtod->vclock_mode == VCLOCK_TSC)
+ cycles = vread_tsc_raw();
+#ifdef CONFIG_PARAVIRT_CLOCK
+ else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
+ cycles = vread_pvclock(mode);
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+ else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
+ cycles = vread_hvclock(mode);
+#endif
+ else
+ return 0;
+ v = (cycles - gtod->raw_cycle_last) & gtod->raw_mask;
+ return v * gtod->raw_mult;
+}
+
/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
notrace static int __always_inline do_realtime(struct timespec *ts)
{
@@ -246,6 +279,27 @@ notrace static int __always_inline do_monotonic(struct timespec *ts)
return mode;
}

+notrace static __always_inline int do_monotonic_raw(struct timespec *ts)
+{
+ unsigned long seq;
+ u64 ns;
+ int mode;
+
+ do {
+ seq = gtod_read_begin(gtod);
+ mode = gtod->vclock_mode;
+ ts->tv_sec = gtod->monotonic_time_raw_sec;
+ ns = gtod->monotonic_time_raw_nsec;
+ ns += vgetsns_raw(&mode);
+ ns >>= gtod->raw_shift;
+ } while (unlikely(gtod_read_retry(gtod, seq)));
+
+ ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
+
+ return mode;
+}
+
notrace static void do_realtime_coarse(struct timespec *ts)
{
unsigned long seq;
@@ -277,6 +331,10 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
if (do_monotonic(ts) == VCLOCK_NONE)
goto fallback;
break;
+ case CLOCK_MONOTONIC_RAW:
+ if (do_monotonic_raw(ts) == VCLOCK_NONE)
+ goto fallback;
+ break;
case CLOCK_REALTIME_COARSE:
do_realtime_coarse(ts);
break;
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index e1216dd..5af7093 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -45,6 +45,11 @@ void update_vsyscall(struct timekeeper *tk)
vdata->mult = tk->tkr_mono.mult;
vdata->shift = tk->tkr_mono.shift;

+ vdata->raw_cycle_last = tk->tkr_raw.cycle_last;
+ vdata->raw_mask = tk->tkr_raw.mask;
+ vdata->raw_mult = tk->tkr_raw.mult;
+ vdata->raw_shift = tk->tkr_raw.shift;
+
vdata->wall_time_sec = tk->xtime_sec;
vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;

@@ -74,5 +79,8 @@ void update_vsyscall(struct timekeeper *tk)
vdata->monotonic_time_coarse_sec++;
}

+ vdata->monotonic_time_raw_sec = tk->raw_sec;
+ vdata->monotonic_time_raw_nsec = tk->tkr_raw.xtime_nsec;
+
gtod_write_end(vdata);
}
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index fb856c9..24e4d45 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -22,6 +22,10 @@ struct vsyscall_gtod_data {
u64 mask;
u32 mult;
u32 shift;
+ u64 raw_cycle_last;
+ u64 raw_mask;
+ u32 raw_mult;
+ u32 raw_shift;

/* open coded 'struct timespec' */
u64 wall_time_snsec;
@@ -32,6 +36,8 @@ struct vsyscall_gtod_data {
gtod_long_t wall_time_coarse_nsec;
gtod_long_t monotonic_time_coarse_sec;
gtod_long_t monotonic_time_coarse_nsec;
+ gtod_long_t monotonic_time_raw_sec;
+ gtod_long_t monotonic_time_raw_nsec;

int tz_minuteswest;
int tz_dsttime;

2018-03-15 16:07:43

by Jason Vas Dias

[permalink] [raw]
Subject: [PATCH v4.16-rc5 2/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index fbc7371..2c46675 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -184,10 +184,9 @@ notrace static u64 vread_tsc(void)

notrace static u64 vread_tsc_raw(void)
{
- u64 tsc
+ u64 tsc = (gtod->has_rdtscp ? rdtscp((void *)0) : rdtsc_ordered())
, last = gtod->raw_cycle_last;

- tsc = rdtsc_ordered();
if (likely(tsc >= last))
return tsc;
asm volatile ("");
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index 5af7093..0327a95 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -16,6 +16,9 @@
#include <linux/timekeeper_internal.h>
#include <asm/vgtod.h>
#include <asm/vvar.h>
+#include <asm/cpufeature.h>
+
+extern unsigned int tsc_khz;

int vclocks_used __read_mostly;

@@ -49,6 +52,7 @@ void update_vsyscall(struct timekeeper *tk)
vdata->raw_mask = tk->tkr_raw.mask;
vdata->raw_mult = tk->tkr_raw.mult;
vdata->raw_shift = tk->tkr_raw.shift;
+ vdata->has_rdtscp = static_cpu_has(X86_FEATURE_RDTSCP);

vdata->wall_time_sec = tk->xtime_sec;
vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 30df295..a5ff704 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -218,6 +218,37 @@ static __always_inline unsigned long long rdtsc_ordered(void)
return rdtsc();
}

+/**
+ * rdtscp() - read the current TSC and (optionally) CPU number, with built-in
+ * cancellation point replacing barrier - only available
+ * if static_cpu_has(X86_FEATURE_RDTSCP) .
+ * returns: The 64-bit Time Stamp Counter (TSC) value.
+ * Optionally, 'cpu_out' can be non-null, and on return it will contain
+ * the number (Intel CPU ID) of the CPU that the task is currently running on.
+ * As does EAX_EDT_RET, this uses the "open-coded asm" style to
+ * force the compiler + assembler to always use (eax, edx, ecx) registers,
+ * NOT whole (rax, rdx, rcx) on x86_64 , because only 32-bit
+ * variables are used - exactly the same code should be generated
+ * for this instruction on 32-bit as on 64-bit when this asm stanza is used.
+ * See: SDM , Vol #2, RDTSCP instruction.
+ */
+static __always_inline u64 rdtscp(u32 *cpu_out)
+{
+ u32 tsc_lo, tsc_hi, tsc_cpu;
+
+ asm volatile
+ ("rdtscp"
+ : "=a" (tsc_lo)
+ , "=d" (tsc_hi)
+ , "=c" (tsc_cpu)
+ ); // : eax, edx, ecx used - NOT rax, rdx, rcx
+ if (unlikely(cpu_out != ((void *)0)))
+ *cpu_out = tsc_cpu;
+ return ((((u64)tsc_hi) << 32) |
+ (((u64)tsc_lo) & 0x0ffffffffULL)
+ );
+}
+
/* Deprecated, keep it for a cycle for easier merging: */
#define rdtscll(now) do { (now) = rdtsc_ordered(); } while (0)

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 24e4d45..e7e4804 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -26,6 +26,7 @@ struct vsyscall_gtod_data {
u64 raw_mask;
u32 raw_mult;
u32 raw_shift;
+ u32 has_rdtscp;

/* open coded 'struct timespec' */
u64 wall_time_snsec;

2018-03-15 20:18:49

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH v4.16-rc5 (3)] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

Jason,

On Thu, 15 Mar 2018, [email protected] wrote:

> Resent to address reviewer comments.

I was being patient so far and tried to guide you through the patch
submission process, but unfortunately this turns out to be just waste of my
time.

You have not addressed any of the comments I made here:

[1] https://lkml.kernel.org/r/[email protected]
[2] https://lkml.kernel.org/r/[email protected]

But still you claim that you addressed reviewer comments.

The delta between the two patch series shows clearly that you ignored both
review mails completely. You merily changed the implementation of a
function, which I not even reviewed, and fiddled in the big fat comment.

Feel free to ignore me, but rest assured that this is the last chance
before you end up in the /dev/null mail filter.

I recommend you to try to fixup only the first patch and once that is good
to go you might try to submit the rest. But please take your time and
address _ALL_ of my comments and follow the documented rules.

If you have questions regarding my comments or the process after working
through them and the Documentation I pointed to, feel free to ask them on
the list and not in private mail. If you think that my comments or
recommendations are wrong, discuss them on list instead of silently
ignoring them.

Yours grumpy

tglx

2018-03-15 21:44:25

by Jason Vas Dias

[permalink] [raw]
Subject: Re: [PATCH v4.16-rc5 (3)] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

Hi Thomas -
RE:
On 15/03/2018, Thomas Gleixner <[email protected]> wrote:
> Jason,
>
> On Thu, 15 Mar 2018, [email protected] wrote:
>
>> Resent to address reviewer comments.
>
> I was being patient so far and tried to guide you through the patch
> submission process, but unfortunately this turns out to be just waste of my
> time.
>
> You have not addressed any of the comments I made here:
>
> [1]
> https://lkml.kernel.org/r/[email protected]
> [2]
> https://lkml.kernel.org/r/[email protected]
>

I'm really sorry about that - I did not see those mails ,
and have searched for them in my inbox -
are you sure they were sent to '[email protected]' ?
That is the only list I am subscribed to .
I clicked on the links , but the 'To:' field is just
'linux-kernel' .

If I had seen those messages before I re-submitted,
those issues would have been fixed.

checkpatch.pl did not report them -
I ran it with all patches and it reported
no errors .

And I did send the test results in a previous mail -

$ gcc -m64 -o timer timer.c

( must be compiled in 64-bit mode).

This is using the new rdtscp() function :
$ ./timer -r 100
...
Total time: 0.000002806S - Average Latency: 0.000000028S N zero
deltas: 0 N inconsistent deltas: 0
Average of 100 average latencies of 100 samples : 0.000000027S

This is using the rdtsc_ordered() function:

$ ./timer -m -r 100
Total time: 0.000005269S - Average Latency: 0.000000052S N zero
deltas: 0 N inconsistent deltas: 0
Average of 100 average latencies of 100 samples : 0.000000047S

timer.c is a very short program that just reads N_SAMPLES (a
compile-time option)
timespecs using either CLOCK_MONOTONIC_RAW (no -m) or CLOCK_MONOTONIC
first parameter to clock_gettime(), then
computes the deltas as long long, then averages them , counting any
zero deltas, or deltas where the previous timespec is somehow
greater than the current timespec, which are reported as
inconsitencies (note 'inconistent deltas:0' and 'zero deltas: 0' in output).

So my initial claim that rdtscp() can be twice as fast as rdtsc_ordered()
was not far-fetched - this is what I am seeing .

I think this is because of the explicit barrier() call in rdtsc_ordered() .
This must be slower than than the internal processor pipeline
"cancellation point" (barrier) used by the rdtscp instruction itself.
This is the only reason for the rdtscp call - plus all modern Intel
& AMD CPUs support it, and it DOES solve the ordering problem,
whereby instructions in one pipeline of a task can get different
rdtsc() results than instructions in another pipeline.

I will document the results better in the ChangeLog , fix all issues
you identified, and resend .

I did not mean to ignore your comments -
those mails are nowhere in my Inbox -
please , confirm the actual email address
they are getting sent to.

Thanks & Regards,
Jason


Attachments:
timer.c (3.52 kB)

2018-03-15 22:44:39

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH v4.16-rc5 (3)] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

On Thu, 15 Mar 2018, Jason Vas Dias wrote:
> On 15/03/2018, Thomas Gleixner <[email protected]> wrote:
> > On Thu, 15 Mar 2018, [email protected] wrote:
> >
> >> Resent to address reviewer comments.
> >
> > I was being patient so far and tried to guide you through the patch
> > submission process, but unfortunately this turns out to be just waste of my
> > time.
> >
> > You have not addressed any of the comments I made here:
> >
> > [1]
> > https://lkml.kernel.org/r/[email protected]
> > [2]
> > https://lkml.kernel.org/r/[email protected]
> >
>
> I'm really sorry about that - I did not see those mails ,
> and have searched for them in my inbox -

That's close to the 'my dog ate the homework' excuse.

Of course they were sent to the list and to you personally as I used
reply-all. From the mail server log:

2018-03-14 15:27:27 1ew7NH-00039q-Hv <= [email protected] [email protected]

2018-03-14 15:27:30 1ew7NH-00039q-Hv => [email protected] R=dnslookup T=remote_smtp H=gmail-smtp-in.l.google.com [2a00:1450:4013:c01::1a] X=TLS1.2:RSA_AES_128_CBC_SHA1:128 DN="C=US,ST=California,L=Mountain View,O=Google Inc,CN=mx.google.com"

2018-03-14 15:27:31 1ew7NH-00039q-Hv => [email protected] R=dnslookup T=remote_smtp H=vger.kernel.org [209.132.180.67]

<SNIP other recipients on CC list>

2018-03-14 15:27:47 1ew7NH-00039q-Hv Completed

If those messages would not have been delivered to
[email protected] they would hardly be on the mailing list
archive, right?

And they both got delivered to your gmail account as well.

> are you sure they were sent to '[email protected]' ?
> That is the only list I am subscribed to .
> I clicked on the links , but the 'To:' field is just
> 'linux-kernel' .

There is no 'To' field on that page.

[prev in list] [next in list] [prev in thread] [next in thread]

List: linux-kernel

List != To.

As any other sane archive it strips the To and Cc fields for obvious
reasons.

> If I had seen those messages before I re-submitted,
> those issues would have been fixed.
>
> checkpatch.pl did not report them -
> I ran it with all patches and it reported
> no errors .

patch 1/3:

ERROR: Missing Signed-off-by: line(s)
total: 1 errors, 0 warnings, 119 lines checked

patch 2/3:

ERROR: Missing Signed-off-by: line(s)
total: 1 errors, 0 warnings, 71 lines checked

patch 3/3:

WARNING: externs should be avoided in .c files
#24: FILE: arch/x86/entry/vdso/vclock_gettime.c:31:
+extern unsigned int __vdso_tsc_calibration(

WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#93:
new file mode 100644

ERROR: Missing Signed-off-by: line(s)

total: 1 errors, 2 warnings, 143 lines checked

It reports an error for every single patch of your latest submission.

> And I did send the test results in a previous mail -

In private mail which I ignore if there is no real good reason. And just
for the record. This private mail contains the following headers:

In-Reply-To: <[email protected]>
References: <[email protected]>
<[email protected]>
<[email protected]>
From: Jason Vas Dias <[email protected]>
Date: Wed, 14 Mar 2018 15:08:55 +0000
Message-ID: <CALyZvKwB667X-ADq4PE8p7_oC2-gdJWQcw4ch4NAadmw9ZoSmw@mail.gmail.com>
Subject: Re: [PATCH v4.16-rc5 2/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

So now, if you take the message ID which is in the In-Reply-To: field and
compare it to the message ID which I used for link [2]:

In-Reply-To: <[email protected]>
> > https://lkml.kernel.org/r/[email protected]

you might notice that these are identical. So how did you end up replying
to a mail which you never received?

Nice try. I'm really fed up with this.

Thanks,

tglx



2018-03-16 13:31:33

by Jason Vas Dias

[permalink] [raw]
Subject: Re: [PATCH v4.16-rc5 (3)] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

Good day -

RE:
On 15/03/2018, Thomas Gleixner <[email protected]> wrote:
> On Thu, 15 Mar 2018, Jason Vas Dias wrote:
>> On 15/03/2018, Thomas Gleixner <[email protected]> wrote:
>> > On Thu, 15 Mar 2018, [email protected] wrote:
>> >
>> >> Resent to address reviewer comments.
>> >
>> > I was being patient so far and tried to guide you through the patch
>> > submission process, but unfortunately this turns out to be just waste of
>> > my
>> > time.
>> >
>> > You have not addressed any of the comments I made here:
>> >
>> > [1]
>> > https://lkml.kernel.org/r/[email protected]
>> > [2]
>> > https://lkml.kernel.org/r/[email protected]
>> >
>>
>> I'm really sorry about that - I did not see those mails ,
>> and have searched for them in my inbox -
>
> That's close to the 'my dog ate the homework' excuse.
>


Nevertheless, those messages are NOT in my inbox, nor
can I find them on the list - a google search for
'alpine.DEB.2.21.1803141511340.2481' or
'alpine.DEB.2.21.1803141527300.2481' returns
only the last two mails on the subject , where
you included the links to https://lkml.kernel.org.

I don't know what went wrong here, but I did not
receive those mails until you informed me of them
yesterday evening, when I immediately regenerated
the Patch #1 incorporating fixes for your comments,
and sent it with Subject:
'[PATCH v4.16-rc5 1/1] x86/vdso: VDSO should handle\
clock_gettime(CLOCK_MONOTONIC_RAW) without syscall
'
This version re-uses the 'gtod->cycles' value, which as you point
out, is the same as 'tk->tkr_raw.cycle_last' -
so I removed vread_tsc_raw() .


> Of course they were sent to the list and to you personally as I used
> reply-all. From the mail server log:
>
> 2018-03-14 15:27:27 1ew7NH-00039q-Hv <= [email protected]
> [email protected]
>
> 2018-03-14 15:27:30 1ew7NH-00039q-Hv => [email protected] R=dnslookup
> T=remote_smtp H=gmail-smtp-in.l.google.com [2a00:1450:4013:c01::1a]
> X=TLS1.2:RSA_AES_128_CBC_SHA1:128 DN="C=US,ST=California,L=Mountain
> View,O=Google Inc,CN=mx.google.com"
>
> 2018-03-14 15:27:31 1ew7NH-00039q-Hv => [email protected]
> R=dnslookup T=remote_smtp H=vger.kernel.org [209.132.180.67]
>
> <SNIP other recipients on CC list>
>
> 2018-03-14 15:27:47 1ew7NH-00039q-Hv Completed
>
> If those messages would not have been delivered to
> [email protected] they would hardly be on the mailing list
> archive, right?
>

Yes, I cannot explain why I did not receive them .

I guess I should consider gmail an unreliable delivery
method and use the lkml.org web interface to check
for replies - I will do this from now one.

> And they both got delivered to your gmail account as well.
>

No, they are not in my gmail account Inbox or folders.


> ERROR: Missing Signed-off-by: line(s)
> total: 1 errors, 0 warnings, 71 lines checked
>

I do not know how to fix this error - I was hoping
someone on the list might enlighten me.

>
> WARNING: externs should be avoided in .c files
> #24: FILE: arch/x86/entry/vdso/vclock_gettime.c:31:
> +extern unsigned int __vdso_tsc_calibration(
>

I thought that must be a script bug, since no extern
is being declared by that line; it is an external function
declaration, just like the unmodified line that precedes it.


> WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
> #93:
> new file mode 100644
>
> ERROR: Missing Signed-off-by: line(s)
>
> total: 1 errors, 2 warnings, 143 lines checked
>
> It reports an error for every single patch of your latest submission.
>
>> And I did send the test results in a previous mail -
>
> In private mail which I ignore if there is no real good reason. And just
> for the record. This private mail contains the following headers:
>
> In-Reply-To: <[email protected]>
> References: <[email protected]>
> <[email protected]>
> <[email protected]>
> From: Jason Vas Dias <[email protected]>
> Date: Wed, 14 Mar 2018 15:08:55 +0000
> Message-ID:
> <CALyZvKwB667X-ADq4PE8p7_oC2-gdJWQcw4ch4NAadmw9ZoSmw@mail.gmail.com>
> Subject: Re: [PATCH v4.16-rc5 2/3] x86/vdso: on Intel, VDSO should handle
> CLOCK_MONOTONIC_RAW
>
> So now, if you take the message ID which is in the In-Reply-To: field and
> compare it to the message ID which I used for link [2]:
>
> In-Reply-To: <[email protected]>
>> > https://lkml.kernel.org/r/[email protected]
>
> you might notice that these are identical. So how did you end up replying
> to a mail which you never received?
>
> Nice try. I'm really fed up with this.
>

The only thing I'm trying to do is fix the implementation
of clock_gettime(CLOCK_MONOTONIC_RAW) .

I was replying to the message you sent , which contained a
reference header for the messages you sent but which I did
not receive . I am sorry this happened, but it is beyond my
control.

As soon as I can, I am going to rent a host in a data-center
and set up my own email server. If this experience has taught
me anything, it is that gmail is not of sufficient quality for
reliable transmission of email - I am sorry I am stuck with it
for now.

I will check the Web-based LKML archives instead of my
Gmail inbox for your replies in future - sorry !

But what of the patch I sent last night, which does
address all of your concerns and passes all checkpatch.pl tests?

$ scripts/checkpatch.pl
/tmp/vdso_vclock_gettime_CLOCK_MONOTONIC_RAW_4.16-rc5_basic.patch
total: 0 errors, 0 warnings, 132 lines checked

/tmp/vdso_vclock_gettime_CLOCK_MONOTONIC_RAW_4.16-rc5_basic.patch has
no obvious style problems and is ready for submission.

Once this is submitted, maybe we can work on improving the performance of
both clock_gettime implementations in the vDSO by use of rdtscp , and
exporting some clock calibration to user-space.

Thanks & Best Regards,
Jason

2018-03-17 07:08:14

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v4.16-rc5 3/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

Hi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]
[also build test ERROR on next-20180316]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/jason-vas-dias-gmail-com/x86-vdso-on-Intel-VDSO-should-handle-CLOCK_MONOTONIC_RAW/20180317-143702
config: x86_64-randconfig-x010-201810 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64

All errors (new ones prefixed by >>):

arch/x86/entry/vdso/vclock_gettime.c: In function '__vdso_linux_tsc_calibration':
>> arch/x86/entry/vdso/vclock_gettime.c:399:27: error: 'struct vsyscall_gtod_data' has no member named 'tsc_khz'
tsc_cal->tsc_khz = gtod->tsc_khz;
^~

vim +399 arch/x86/entry/vdso/vclock_gettime.c

324
325 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
326 {
327 switch (clock) {
328 case CLOCK_REALTIME:
329 if (do_realtime(ts) == VCLOCK_NONE)
330 goto fallback;
331 break;
332 case CLOCK_MONOTONIC:
333 if (do_monotonic(ts) == VCLOCK_NONE)
334 goto fallback;
335 break;
336 case CLOCK_MONOTONIC_RAW:
337 if (do_monotonic_raw(ts) == VCLOCK_NONE)
338 goto fallback;
339 break;
340 case CLOCK_REALTIME_COARSE:
341 do_realtime_coarse(ts);
342 break;
343 case CLOCK_MONOTONIC_COARSE:
344 do_monotonic_coarse(ts);
345 break;
346 default:
347 goto fallback;
348 }
349
350 return 0;
351 fallback:
352 return vdso_fallback_gettime(clock, ts);
353 }
354 int clock_gettime(clockid_t, struct timespec *)
355 __attribute__((weak, alias("__vdso_clock_gettime")));
356
357 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
358 {
359 if (likely(tv != NULL)) {
360 if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
361 return vdso_fallback_gtod(tv, tz);
362 tv->tv_usec /= 1000;
363 }
364 if (unlikely(tz != NULL)) {
365 tz->tz_minuteswest = gtod->tz_minuteswest;
366 tz->tz_dsttime = gtod->tz_dsttime;
367 }
368
369 return 0;
370 }
371 int gettimeofday(struct timeval *, struct timezone *)
372 __attribute__((weak, alias("__vdso_gettimeofday")));
373
374 /*
375 * This will break when the xtime seconds get inaccurate, but that is
376 * unlikely
377 */
378 notrace time_t __vdso_time(time_t *t)
379 {
380 /* This is atomic on x86 so we don't need any locks. */
381 time_t result = READ_ONCE(gtod->wall_time_sec);
382
383 if (t)
384 *t = result;
385 return result;
386 }
387 time_t time(time_t *t)
388 __attribute__((weak, alias("__vdso_time")));
389
390 notrace unsigned int
391 __vdso_linux_tsc_calibration(struct linux_tsc_calibration_s *tsc_cal)
392 {
393 unsigned long seq;
394
395 do {
396 seq = gtod_read_begin(gtod);
397 if ((gtod->vclock_mode == VCLOCK_TSC) &&
398 (tsc_cal != ((void *)0UL))) {
> 399 tsc_cal->tsc_khz = gtod->tsc_khz;
400 tsc_cal->mult = gtod->raw_mult;
401 tsc_cal->shift = gtod->raw_shift;
402 return 1;
403 }
404 } while (unlikely(gtod_read_retry(gtod, seq)));
405
406 return 0;
407 }
408

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (3.65 kB)
.config.gz (25.50 kB)
Download all attachments

2018-03-17 07:30:32

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v4.16-rc5 3/3] x86/vdso: on Intel, VDSO should handle CLOCK_MONOTONIC_RAW

Hi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.16-rc4]
[also build test ERROR on next-20180316]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/jason-vas-dias-gmail-com/x86-vdso-on-Intel-VDSO-should-handle-CLOCK_MONOTONIC_RAW/20180317-143702
config: i386-randconfig-x013-201810 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386

All errors (new ones prefixed by >>):

In file included from arch/x86/entry/vdso/vdso32/vclock_gettime.c:33:0:
arch/x86/entry/vdso/vdso32/../vclock_gettime.c: In function '__vdso_linux_tsc_calibration':
>> arch/x86/entry/vdso/vdso32/../vclock_gettime.c:399:27: error: 'struct vsyscall_gtod_data' has no member named 'tsc_khz'
tsc_cal->tsc_khz = gtod->tsc_khz;
^~

vim +399 arch/x86/entry/vdso/vdso32/../vclock_gettime.c

324
325 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
326 {
327 switch (clock) {
328 case CLOCK_REALTIME:
329 if (do_realtime(ts) == VCLOCK_NONE)
330 goto fallback;
331 break;
332 case CLOCK_MONOTONIC:
333 if (do_monotonic(ts) == VCLOCK_NONE)
334 goto fallback;
335 break;
336 case CLOCK_MONOTONIC_RAW:
337 if (do_monotonic_raw(ts) == VCLOCK_NONE)
338 goto fallback;
339 break;
340 case CLOCK_REALTIME_COARSE:
341 do_realtime_coarse(ts);
342 break;
343 case CLOCK_MONOTONIC_COARSE:
344 do_monotonic_coarse(ts);
345 break;
346 default:
347 goto fallback;
348 }
349
350 return 0;
351 fallback:
352 return vdso_fallback_gettime(clock, ts);
353 }
354 int clock_gettime(clockid_t, struct timespec *)
355 __attribute__((weak, alias("__vdso_clock_gettime")));
356
357 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
358 {
359 if (likely(tv != NULL)) {
360 if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
361 return vdso_fallback_gtod(tv, tz);
362 tv->tv_usec /= 1000;
363 }
364 if (unlikely(tz != NULL)) {
365 tz->tz_minuteswest = gtod->tz_minuteswest;
366 tz->tz_dsttime = gtod->tz_dsttime;
367 }
368
369 return 0;
370 }
371 int gettimeofday(struct timeval *, struct timezone *)
372 __attribute__((weak, alias("__vdso_gettimeofday")));
373
374 /*
375 * This will break when the xtime seconds get inaccurate, but that is
376 * unlikely
377 */
378 notrace time_t __vdso_time(time_t *t)
379 {
380 /* This is atomic on x86 so we don't need any locks. */
381 time_t result = READ_ONCE(gtod->wall_time_sec);
382
383 if (t)
384 *t = result;
385 return result;
386 }
387 time_t time(time_t *t)
388 __attribute__((weak, alias("__vdso_time")));
389
390 notrace unsigned int
391 __vdso_linux_tsc_calibration(struct linux_tsc_calibration_s *tsc_cal)
392 {
393 unsigned long seq;
394
395 do {
396 seq = gtod_read_begin(gtod);
397 if ((gtod->vclock_mode == VCLOCK_TSC) &&
398 (tsc_cal != ((void *)0UL))) {
> 399 tsc_cal->tsc_khz = gtod->tsc_khz;
400 tsc_cal->mult = gtod->raw_mult;
401 tsc_cal->shift = gtod->raw_shift;
402 return 1;
403 }
404 } while (unlikely(gtod_read_retry(gtod, seq)));
405
406 return 0;
407 }
408

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (3.75 kB)
.config.gz (36.20 kB)
Download all attachments