2014-10-01 21:24:48

by Henrik Austad

[permalink] [raw]
Subject: Re: [PATCH] tile: add clock_gettime support to vDSO

On Tue, Sep 30, 2014 at 03:29:45PM -0400, Chris Metcalf wrote:
> This change adds support for clock_gettime with CLOCK_REALTIME
> and CLOCK_MONOTONIC using vDSO. In addition, with this change
> we switch to use seqlocks instead of integer counters.
>
> We also support the *_COARSE clockid_t, for apps that want speed
> but aren't concerned about fine-grained timestamps; this saves
> about 20 cycles per call (see http://lwn.net/Articles/342018/).
>
> Signed-off-by: Chris Metcalf <[email protected]>

What rev is this based on? When I apply it to Linus latest (aad7fb916), it
fails in arch/tile/kernel/time.c and arch/tile/kernel/vdso/vgettimeofday.c
(see below)

> ---
> arch/tile/include/asm/vdso.h | 9 +-
> arch/tile/kernel/time.c | 31 ++++---
> arch/tile/kernel/vdso/vdso.lds.S | 2 +
> arch/tile/kernel/vdso/vgettimeofday.c | 166 +++++++++++++++++++++++++---------
> 4 files changed, 153 insertions(+), 55 deletions(-)
>
> diff --git a/arch/tile/include/asm/vdso.h b/arch/tile/include/asm/vdso.h
> index 9f6a78d665fa..4527701fcead 100644
> --- a/arch/tile/include/asm/vdso.h
> +++ b/arch/tile/include/asm/vdso.h
> @@ -15,6 +15,7 @@
> #ifndef __TILE_VDSO_H__
> #define __TILE_VDSO_H__
>
> +#include <linux/seqlock.h>
> #include <linux/types.h>
>
> /*
> @@ -26,8 +27,8 @@
> */
>
> struct vdso_data {
> - __u64 tz_update_count; /* Timezone atomicity ctr */
> - __u64 tb_update_count; /* Timebase atomicity ctr */
> + seqcount_t tz_seq; /* Timezone seqlock */
> + seqcount_t tb_seq; /* Timebase seqlock */
> __u64 xtime_tod_stamp; /* TOD clock for xtime */
> __u64 xtime_clock_sec; /* Kernel time second */
> __u64 xtime_clock_nsec; /* Kernel time nanosecond */
> @@ -37,6 +38,10 @@ struct vdso_data {
> __u32 shift; /* Cycle to nanosecond divisor (power of two) */
> __u32 tz_minuteswest; /* Minutes west of Greenwich */
> __u32 tz_dsttime; /* Type of dst correction */
> + __u64 xtime_clock_coarse_sec; /* Coarse kernel time */
> + __u64 xtime_clock_coarse_nsec;
> + __u64 wtom_clock_coarse_sec; /* Coarse wall to monotonic time */
> + __u64 wtom_clock_coarse_nsec;
> };
>
> extern struct vdso_data *vdso_data;
> diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
> index 462dcd0c1700..77624b38bdb9 100644
> --- a/arch/tile/kernel/time.c
> +++ b/arch/tile/kernel/time.c
> @@ -249,13 +249,10 @@ cycles_t ns2cycles(unsigned long nsecs)
>
> void update_vsyscall_tz(void)
> {
> - /* Userspace gettimeofday will spin while this value is odd. */
> - ++vdso_data->tz_update_count;
> - smp_wmb();
> + write_seqcount_begin(&vdso_data->tz_seq);
> vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
> vdso_data->tz_dsttime = sys_tz.tz_dsttime;
> - smp_wmb();
> - ++vdso_data->tz_update_count;
> + write_seqcount_end(&vdso_data->tz_seq);
> }
>
> void update_vsyscall(struct timekeeper *tk)
> @@ -263,20 +260,30 @@ void update_vsyscall(struct timekeeper *tk)
> struct timespec wall_time = tk_xtime(tk);

wall_time was removed by dc01c9fa ("tile: Convert VDSO timekeeping to the
precise mechanism"). This causes hunk to fail.

> struct timespec *wtm = &tk->wall_to_monotonic;
> struct clocksource *clock = tk->clock;
> + struct timespec ts;
>
> if (clock != &cycle_counter_cs)
> return;
>
> - /* Userspace gettimeofday will spin while this value is odd. */
> - ++vdso_data->tb_update_count;
> - smp_wmb();
> + write_seqcount_begin(&vdso_data->tb_seq);
> +
> vdso_data->xtime_tod_stamp = clock->cycle_last;
> vdso_data->xtime_clock_sec = wall_time.tv_sec;
> vdso_data->xtime_clock_nsec = wall_time.tv_nsec;
> - vdso_data->wtom_clock_sec = wtm->tv_sec;
> - vdso_data->wtom_clock_nsec = wtm->tv_nsec;
> +
> + ts = timespec_add(wall_time, *wtm);

could you do something like

ts = timespec_add(&tk->wall_to_monotonic, wtm);

instead?

> + vdso_data->wtom_clock_sec = ts.tv_sec;
> + vdso_data->wtom_clock_nsec = ts.tv_nsec;
> +
> vdso_data->mult = clock->mult;
> vdso_data->shift = clock->shift;
> - smp_wmb();
> - ++vdso_data->tb_update_count;
> +
> + ts = __current_kernel_time();
> + vdso_data->xtime_clock_coarse_sec = ts.tv_sec;
> + vdso_data->xtime_clock_coarse_nsec = ts.tv_nsec;
> + ts = timespec_add(ts, *wtm);
> + vdso_data->wtom_clock_coarse_sec = ts.tv_sec;
> + vdso_data->wtom_clock_coarse_nsec = ts.tv_nsec;
> +
> + write_seqcount_end(&vdso_data->tb_seq);
> }
> diff --git a/arch/tile/kernel/vdso/vdso.lds.S b/arch/tile/kernel/vdso/vdso.lds.S
> index 041cd6c39c83..731529f3f06f 100644
> --- a/arch/tile/kernel/vdso/vdso.lds.S
> +++ b/arch/tile/kernel/vdso/vdso.lds.S
> @@ -82,6 +82,8 @@ VERSION
> __vdso_rt_sigreturn;
> __vdso_gettimeofday;
> gettimeofday;
> + __vdso_clock_gettime;
> + clock_gettime;
> local:*;
> };
> }
> diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c
> index 51ec8e46f5f9..a09043492d1e 100644
> --- a/arch/tile/kernel/vdso/vgettimeofday.c
> +++ b/arch/tile/kernel/vdso/vgettimeofday.c
> @@ -15,8 +15,14 @@
> #define VDSO_BUILD /* avoid some shift warnings for -m32 in <asm/page.h> */
> #include <linux/time.h>
> #include <asm/timex.h>
> +#include <asm/unistd.h>
> #include <asm/vdso.h>
>
> +struct syscall_return_value {
> + long value;
> + long error;
> +};
> +
> #if CHIP_HAS_SPLIT_CYCLE()
> static inline cycles_t get_cycles_inline(void)
> {
> @@ -50,58 +56,136 @@ inline unsigned long get_datapage(void)
> return ret;
> }
>
> -int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
> +static inline int do_realtime(struct vdso_data *vdso, struct timespec *ts)
> +{
> + int count;
> + cycles_t cycles;
> + unsigned long ns;
> +
> + do {
> + count = read_seqcount_begin(&vdso->tb_seq);
> + cycles = get_cycles() - vdso->xtime_tod_stamp;
> + ns = (cycles * vdso->mult) >> vdso->shift;
> + ts->tv_sec = vdso->xtime_clock_sec;
> + ts->tv_nsec = vdso->xtime_clock_nsec;
> + } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
> +
> + timespec_add_ns(ts, ns);
> + return 0;
> +}
> +
> +static inline int do_monotonic(struct vdso_data *vdso, struct timespec *ts)
> {
> + int count;
> cycles_t cycles;
> - unsigned long count, sec, ns;
> - volatile struct vdso_data *vdso_data;
> + unsigned long ns;
> +
> + do {
> + count = read_seqcount_begin(&vdso->tb_seq);
> + cycles = get_cycles() - vdso->xtime_tod_stamp;
> + ns = (cycles * vdso->mult) >> vdso->shift;
> + ts->tv_sec = vdso->wtom_clock_sec;
> + ts->tv_nsec = vdso->wtom_clock_nsec;
> + } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
> +
> + timespec_add_ns(ts, ns);
> + return 0;
> +}
> +
> +static inline int do_realtime_coarse(struct vdso_data *vdso,
> + struct timespec *ts)
> +{
> + unsigned long count;
> +
> + do {
> + count = read_seqcount_begin(&vdso->tb_seq);
> + ts->tv_sec = vdso->xtime_clock_coarse_sec;
> + ts->tv_nsec = vdso->xtime_clock_coarse_nsec;
> + } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
> +
> + return 0;
> +}
> +
> +static inline int do_monotonic_coarse(struct vdso_data *vdso,
> + struct timespec *ts)
> +{
> + unsigned long count;
> +
> + do {
> + count = read_seqcount_begin(&vdso->tb_seq);
> + ts->tv_sec = vdso->wtom_clock_coarse_sec;
> + ts->tv_nsec = vdso->wtom_clock_coarse_nsec;
> + } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
> +
> + return 0;
> +}
> +
> +struct syscall_return_value __vdso_gettimeofday(struct timeval *tv,
> + struct timezone *tz)
> +{
> + struct syscall_return_value ret = { 0, 0 };
> + unsigned long count;
> + struct vdso_data *vdso = (struct vdso_data *)get_datapage();
>
> - vdso_data = (struct vdso_data *)get_datapage();
> /* The use of the timezone is obsolete, normally tz is NULL. */
> if (unlikely(tz != NULL)) {
> - while (1) {
> - /* Spin until the update finish. */
> - count = vdso_data->tz_update_count;
> - if (count & 1)
> - continue;
> -
> - tz->tz_minuteswest = vdso_data->tz_minuteswest;
> - tz->tz_dsttime = vdso_data->tz_dsttime;
> -
> - /* Check whether updated, read again if so. */
> - if (count == vdso_data->tz_update_count)
> - break;
> - }
> + do {
> + count = read_seqcount_begin(&vdso->tz_seq);
> + tz->tz_minuteswest = vdso->tz_minuteswest;
> + tz->tz_dsttime = vdso->tz_dsttime;
> + } while (unlikely(read_seqcount_retry(&vdso->tz_seq, count)));
> }
>
> if (unlikely(tv == NULL))
> - return 0;
> -
> - while (1) {
> - /* Spin until the update finish. */
> - count = vdso_data->tb_update_count;
> - if (count & 1)
> - continue;
> -
> - cycles = (get_cycles() - vdso_data->xtime_tod_stamp);
> - ns = (cycles * vdso_data->mult) >> vdso_data->shift;
> - sec = vdso_data->xtime_clock_sec;
> - ns += vdso_data->xtime_clock_nsec;
> - if (ns >= NSEC_PER_SEC) {
> - ns -= NSEC_PER_SEC;
> - sec += 1;
> - }
> -
> - /* Check whether updated, read again if so. */
> - if (count == vdso_data->tb_update_count)
> - break;
> - }
> + return ret;
>
> - tv->tv_sec = sec;
> - tv->tv_usec = ns / 1000;
> + do_realtime(vdso, (struct timespec *)tv);
> + tv->tv_usec /= 1000;
>
> - return 0;
> + return ret;
> }
>
> int gettimeofday(struct timeval *tv, struct timezone *tz)
> __attribute__((weak, alias("__vdso_gettimeofday")));
> +
> +static struct syscall_return_value vdso_fallback_gettime(long clock,
> + struct timespec *ts)
> +{
> + struct syscall_return_value ret;
> + __asm__ __volatile__ (
> + "swint1"
> + : "=R00" (ret.value), "=R01" (ret.error)
> + : "R10" (__NR_clock_gettime), "R00" (clock), "R01" (ts)
> + : "r2", "r3", "r4", "r5", "r6", "r7",
> + "r8", "r9", "r11", "r12", "r13", "r14", "r15",
> + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
> + "r24", "r25", "r26", "r27", "r28", "r29", "memory");
> + return ret;
> +}
> +
> +struct syscall_return_value __vdso_clock_gettime(clockid_t clock,
> + struct timespec *ts)
> +{
> + struct vdso_data *vdso = (struct vdso_data *)get_datapage();
> + struct syscall_return_value ret = { 0, 0 };
> +
> + switch (clock) {
> + case CLOCK_REALTIME:
> + do_realtime(vdso, ts);
> + return ret;
> + case CLOCK_MONOTONIC:
> + do_monotonic(vdso, ts);
> + return ret;
> + case CLOCK_REALTIME_COARSE:
> + do_realtime_coarse(vdso, ts);
> + return ret;
> + case CLOCK_MONOTONIC_COARSE:
> + do_monotonic_coarse(vdso, ts);
> + return ret;
> + default:
> + return vdso_fallback_gettime(clock, ts);
> + }
> +}
> +
> +int clock_gettime(clockid_t clock, struct timespec *ts)
> + __attribute__((weak, alias("__vdso_clock_gettime")));
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
Henrik Austad


2014-10-02 14:17:20

by Chris Metcalf

[permalink] [raw]
Subject: Re: [PATCH] tile: add clock_gettime support to vDSO

On 10/1/2014 5:24 PM, Henrik Austad wrote:
> On Tue, Sep 30, 2014 at 03:29:45PM -0400, Chris Metcalf wrote:
>> >This change adds support for clock_gettime with CLOCK_REALTIME
>> >and CLOCK_MONOTONIC using vDSO. In addition, with this change
>> >we switch to use seqlocks instead of integer counters.
>> >
>> >We also support the *_COARSE clockid_t, for apps that want speed
>> >but aren't concerned about fine-grained timestamps; this saves
>> >about 20 cycles per call (seehttp://lwn.net/Articles/342018/).
>> >
>> >Signed-off-by: Chris Metcalf<[email protected]>
> What rev is this based on? When I apply it to Linus latest (aad7fb916), it
> fails in arch/tile/kernel/time.c and arch/tile/kernel/vdso/vgettimeofday.c
> (see below)

It's based on the linux-tile tree (on kernel.org under cmetcalf/linux-tile.git), which was last synced up at 3.16-rc7. I'll refresh the tree to pick up Thomas's recent changes that were queued for 3.17. Thanks.

--
Chris Metcalf, Tilera Corp.
http://www.tilera.com

2014-10-02 15:30:56

by Chris Metcalf

[permalink] [raw]
Subject: [PATCH v3 1/2] tile: switch to using seqlocks for the vDSO time code

Signed-off-by: Chris Metcalf <[email protected]>
---
arch/tile/include/asm/vdso.h | 5 ++--
arch/tile/kernel/time.c | 16 +++++-------
arch/tile/kernel/vdso/vgettimeofday.c | 47 +++++++++++------------------------
3 files changed, 24 insertions(+), 44 deletions(-)

v2: split apart seqlock and new functionality into separate commits
v3: rebase onto 3.17-rc7

diff --git a/arch/tile/include/asm/vdso.h b/arch/tile/include/asm/vdso.h
index 9f6a78d665fa..d64b0d58a7e9 100644
--- a/arch/tile/include/asm/vdso.h
+++ b/arch/tile/include/asm/vdso.h
@@ -15,6 +15,7 @@
#ifndef __TILE_VDSO_H__
#define __TILE_VDSO_H__

+#include <linux/seqlock.h>
#include <linux/types.h>

/*
@@ -26,8 +27,8 @@
*/

struct vdso_data {
- __u64 tz_update_count; /* Timezone atomicity ctr */
- __u64 tb_update_count; /* Timebase atomicity ctr */
+ seqcount_t tz_seq; /* Timezone seqlock */
+ seqcount_t tb_seq; /* Timebase seqlock */
__u64 xtime_tod_stamp; /* TOD clock for xtime */
__u64 xtime_clock_sec; /* Kernel time second */
__u64 xtime_clock_nsec; /* Kernel time nanosecond */
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index d8fbc289e680..2fe8323db77e 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -249,13 +249,10 @@ cycles_t ns2cycles(unsigned long nsecs)

void update_vsyscall_tz(void)
{
- /* Userspace gettimeofday will spin while this value is odd. */
- ++vdso_data->tz_update_count;
- smp_wmb();
+ write_seqcount_begin(&vdso_data->tz_seq);
vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
vdso_data->tz_dsttime = sys_tz.tz_dsttime;
- smp_wmb();
- ++vdso_data->tz_update_count;
+ write_seqcount_end(&vdso_data->tz_seq);
}

void update_vsyscall(struct timekeeper *tk)
@@ -266,9 +263,8 @@ void update_vsyscall(struct timekeeper *tk)
if (clock != &cycle_counter_cs)
return;

- /* Userspace gettimeofday will spin while this value is odd. */
- ++vdso_data->tb_update_count;
- smp_wmb();
+ write_seqcount_begin(&vdso_data->tb_seq);
+
vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
@@ -276,6 +272,6 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtom_clock_nsec = wtm->tv_nsec;
vdso_data->mult = tk->tkr.mult;
vdso_data->shift = tk->tkr.shift;
- smp_wmb();
- ++vdso_data->tb_update_count;
+
+ write_seqcount_end(&vdso_data->tb_seq);
}
diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c
index e933fb9fbf5c..7cff8fbac4f0 100644
--- a/arch/tile/kernel/vdso/vgettimeofday.c
+++ b/arch/tile/kernel/vdso/vgettimeofday.c
@@ -53,50 +53,33 @@ inline unsigned long get_datapage(void)
int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
cycles_t cycles;
- unsigned long count, sec, ns;
- volatile struct vdso_data *vdso_data;
+ unsigned count;
+ unsigned long sec, ns;
+ struct vdso_data *vdso = (struct vdso_data *)get_datapage();

- vdso_data = (struct vdso_data *)get_datapage();
/* The use of the timezone is obsolete, normally tz is NULL. */
if (unlikely(tz != NULL)) {
- while (1) {
- /* Spin until the update finish. */
- count = vdso_data->tz_update_count;
- if (count & 1)
- continue;
-
- tz->tz_minuteswest = vdso_data->tz_minuteswest;
- tz->tz_dsttime = vdso_data->tz_dsttime;
-
- /* Check whether updated, read again if so. */
- if (count == vdso_data->tz_update_count)
- break;
- }
+ do {
+ count = read_seqcount_begin(&vdso->tz_seq);
+ tz->tz_minuteswest = vdso->tz_minuteswest;
+ tz->tz_dsttime = vdso->tz_dsttime;
+ } while (unlikely(read_seqcount_retry(&vdso->tz_seq, count)));
}

if (unlikely(tv == NULL))
return 0;

- while (1) {
- /* Spin until the update finish. */
- count = vdso_data->tb_update_count;
- if (count & 1)
- continue;
-
- sec = vdso_data->xtime_clock_sec;
- cycles = get_cycles() - vdso_data->xtime_tod_stamp;
- ns = (cycles * vdso_data->mult) + vdso_data->xtime_clock_nsec;
- ns >>= vdso_data->shift;
-
+ do {
+ count = read_seqcount_begin(&vdso->tb_seq);
+ sec = vdso->xtime_clock_sec;
+ cycles = get_cycles() - vdso->xtime_tod_stamp;
+ ns = (cycles * vdso->mult) + vdso->xtime_clock_nsec;
+ ns >>= vdso->shift;
if (ns >= NSEC_PER_SEC) {
ns -= NSEC_PER_SEC;
sec += 1;
}
-
- /* Check whether updated, read again if so. */
- if (count == vdso_data->tb_update_count)
- break;
- }
+ } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));

tv->tv_sec = sec;
tv->tv_usec = ns / 1000;
--
1.8.3.1

2014-10-02 15:31:09

by Chris Metcalf

[permalink] [raw]
Subject: [PATCH v3 2/2] tile: add clock_gettime support to vDSO

This change adds support for clock_gettime with CLOCK_REALTIME
and CLOCK_MONOTONIC using vDSO. It also updates the vdso
struct nomenclature used for the clocks to match the x86 code
to keep it easier to update going forward.

We also support the *_COARSE clockid_t, for apps that want speed
but aren't concerned about fine-grained timestamps; this saves
about 20 cycles per call (see http://lwn.net/Articles/342018/).

Signed-off-by: Chris Metcalf <[email protected]>
---
arch/tile/include/asm/vdso.h | 15 ++--
arch/tile/kernel/time.c | 45 ++++++++---
arch/tile/kernel/vdso/vdso.lds.S | 2 +
arch/tile/kernel/vdso/vgettimeofday.c | 145 +++++++++++++++++++++++++++++-----
4 files changed, 172 insertions(+), 35 deletions(-)

v2: use the x86 code as a model to address both John Stultz's
observation about sub-nanosecond updates, as well as to address
Thomas Gleixner's observation about minimizing time spent with
the seqlock held. Note that I kept the "unlikely" that Thomas
doubted would help, just for parallelism with the x86 code.
v3: rebase onto 3.17-rc7

diff --git a/arch/tile/include/asm/vdso.h b/arch/tile/include/asm/vdso.h
index d64b0d58a7e9..9b069692153f 100644
--- a/arch/tile/include/asm/vdso.h
+++ b/arch/tile/include/asm/vdso.h
@@ -29,13 +29,18 @@
struct vdso_data {
seqcount_t tz_seq; /* Timezone seqlock */
seqcount_t tb_seq; /* Timebase seqlock */
- __u64 xtime_tod_stamp; /* TOD clock for xtime */
- __u64 xtime_clock_sec; /* Kernel time second */
- __u64 xtime_clock_nsec; /* Kernel time nanosecond */
- __u64 wtom_clock_sec; /* Wall to monotonic clock second */
- __u64 wtom_clock_nsec; /* Wall to monotonic clock nanosecond */
+ __u64 cycle_last; /* TOD clock for xtime */
+ __u64 mask; /* Cycle mask */
__u32 mult; /* Cycle to nanosecond multiplier */
__u32 shift; /* Cycle to nanosecond divisor (power of two) */
+ __u64 wall_time_sec;
+ __u64 wall_time_snsec;
+ __u64 monotonic_time_sec;
+ __u64 monotonic_time_snsec;
+ __u64 wall_time_coarse_sec;
+ __u64 wall_time_coarse_nsec;
+ __u64 monotonic_time_coarse_sec;
+ __u64 monotonic_time_coarse_nsec;
__u32 tz_minuteswest; /* Minutes west of Greenwich */
__u32 tz_dsttime; /* Type of dst correction */
};
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index 2fe8323db77e..c1b362277fb7 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -257,21 +257,44 @@ void update_vsyscall_tz(void)

void update_vsyscall(struct timekeeper *tk)
{
- struct timespec *wtm = &tk->wall_to_monotonic;
- struct clocksource *clock = tk->tkr.clock;
-
- if (clock != &cycle_counter_cs)
+ if (tk->tkr.clock != &cycle_counter_cs)
return;

write_seqcount_begin(&vdso_data->tb_seq);

- vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
- vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
- vdso_data->wtom_clock_sec = wtm->tv_sec;
- vdso_data->wtom_clock_nsec = wtm->tv_nsec;
- vdso_data->mult = tk->tkr.mult;
- vdso_data->shift = tk->tkr.shift;
+ vdso_data->cycle_last = tk->tkr.cycle_last;
+ vdso_data->mask = tk->tkr.mask;
+ vdso_data->mult = tk->tkr.mult;
+ vdso_data->shift = tk->tkr.shift;
+
+ vdso_data->wall_time_sec = tk->xtime_sec;
+ vdso_data->wall_time_snsec = tk->tkr.xtime_nsec;
+
+ vdso_data->monotonic_time_sec = tk->xtime_sec
+ + tk->wall_to_monotonic.tv_sec;
+ vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec
+ + ((u64)tk->wall_to_monotonic.tv_nsec
+ << tk->tkr.shift);
+ while (vdso_data->monotonic_time_snsec >=
+ (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+ vdso_data->monotonic_time_snsec -=
+ ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+ vdso_data->monotonic_time_sec++;
+ }
+
+ vdso_data->wall_time_coarse_sec = tk->xtime_sec;
+ vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
+ tk->tkr.shift);
+
+ vdso_data->monotonic_time_coarse_sec =
+ vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
+ vdso_data->monotonic_time_coarse_nsec =
+ vdso_data->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
+
+ while (vdso_data->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
+ vdso_data->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
+ vdso_data->monotonic_time_coarse_sec++;
+ }

write_seqcount_end(&vdso_data->tb_seq);
}
diff --git a/arch/tile/kernel/vdso/vdso.lds.S b/arch/tile/kernel/vdso/vdso.lds.S
index 041cd6c39c83..731529f3f06f 100644
--- a/arch/tile/kernel/vdso/vdso.lds.S
+++ b/arch/tile/kernel/vdso/vdso.lds.S
@@ -82,6 +82,8 @@ VERSION
__vdso_rt_sigreturn;
__vdso_gettimeofday;
gettimeofday;
+ __vdso_clock_gettime;
+ clock_gettime;
local:*;
};
}
diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c
index 7cff8fbac4f0..8bb21eda07d8 100644
--- a/arch/tile/kernel/vdso/vgettimeofday.c
+++ b/arch/tile/kernel/vdso/vgettimeofday.c
@@ -15,6 +15,7 @@
#define VDSO_BUILD /* avoid some shift warnings for -m32 in <asm/page.h> */
#include <linux/time.h>
#include <asm/timex.h>
+#include <asm/unistd.h>
#include <asm/vdso.h>

#if CHIP_HAS_SPLIT_CYCLE()
@@ -35,6 +36,11 @@ static inline cycles_t get_cycles_inline(void)
#define get_cycles get_cycles_inline
#endif

+struct syscall_return_value {
+ long value;
+ long error;
+};
+
/*
* Find out the vDSO data page address in the process address space.
*/
@@ -50,11 +56,82 @@ inline unsigned long get_datapage(void)
return ret;
}

-int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+static inline u64 vgetsns(struct vdso_data *vdso)
+{
+ return ((get_cycles() - vdso->cycle_last) & vdso->mask) * vdso->mult;
+}
+
+static inline int do_realtime(struct vdso_data *vdso, struct timespec *ts)
+{
+ unsigned count;
+ u64 ns;
+
+ do {
+ count = read_seqcount_begin(&vdso->tb_seq);
+ ts->tv_sec = vdso->wall_time_sec;
+ ns = vdso->wall_time_snsec;
+ ns += vgetsns(vdso);
+ ns >>= vdso->shift;
+ } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
+
+ ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
+
+ return 0;
+}
+
+static inline int do_monotonic(struct vdso_data *vdso, struct timespec *ts)
+{
+ unsigned count;
+ u64 ns;
+
+ do {
+ count = read_seqcount_begin(&vdso->tb_seq);
+ ts->tv_sec = vdso->monotonic_time_sec;
+ ns = vdso->monotonic_time_snsec;
+ ns += vgetsns(vdso);
+ ns >>= vdso->shift;
+ } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
+
+ ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
+
+ return 0;
+}
+
+static inline int do_realtime_coarse(struct vdso_data *vdso,
+ struct timespec *ts)
+{
+ unsigned count;
+
+ do {
+ count = read_seqcount_begin(&vdso->tb_seq);
+ ts->tv_sec = vdso->wall_time_coarse_sec;
+ ts->tv_nsec = vdso->wall_time_coarse_nsec;
+ } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
+
+ return 0;
+}
+
+static inline int do_monotonic_coarse(struct vdso_data *vdso,
+ struct timespec *ts)
{
- cycles_t cycles;
unsigned count;
- unsigned long sec, ns;
+
+ do {
+ count = read_seqcount_begin(&vdso->tb_seq);
+ ts->tv_sec = vdso->monotonic_time_coarse_sec;
+ ts->tv_nsec = vdso->monotonic_time_coarse_nsec;
+ } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
+
+ return 0;
+}
+
+struct syscall_return_value __vdso_gettimeofday(struct timeval *tv,
+ struct timezone *tz)
+{
+ struct syscall_return_value ret = { 0, 0 };
+ unsigned count;
struct vdso_data *vdso = (struct vdso_data *)get_datapage();

/* The use of the timezone is obsolete, normally tz is NULL. */
@@ -67,25 +144,55 @@ int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
}

if (unlikely(tv == NULL))
- return 0;
+ return ret;

- do {
- count = read_seqcount_begin(&vdso->tb_seq);
- sec = vdso->xtime_clock_sec;
- cycles = get_cycles() - vdso->xtime_tod_stamp;
- ns = (cycles * vdso->mult) + vdso->xtime_clock_nsec;
- ns >>= vdso->shift;
- if (ns >= NSEC_PER_SEC) {
- ns -= NSEC_PER_SEC;
- sec += 1;
- }
- } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
+ do_realtime(vdso, (struct timespec *)tv);
+ tv->tv_usec /= 1000;

- tv->tv_sec = sec;
- tv->tv_usec = ns / 1000;
-
- return 0;
+ return ret;
}

int gettimeofday(struct timeval *tv, struct timezone *tz)
__attribute__((weak, alias("__vdso_gettimeofday")));
+
+static struct syscall_return_value vdso_fallback_gettime(long clock,
+ struct timespec *ts)
+{
+ struct syscall_return_value ret;
+ __asm__ __volatile__ (
+ "swint1"
+ : "=R00" (ret.value), "=R01" (ret.error)
+ : "R10" (__NR_clock_gettime), "R00" (clock), "R01" (ts)
+ : "r2", "r3", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r11", "r12", "r13", "r14", "r15",
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+ "r24", "r25", "r26", "r27", "r28", "r29", "memory");
+ return ret;
+}
+
+struct syscall_return_value __vdso_clock_gettime(clockid_t clock,
+ struct timespec *ts)
+{
+ struct vdso_data *vdso = (struct vdso_data *)get_datapage();
+ struct syscall_return_value ret = { 0, 0 };
+
+ switch (clock) {
+ case CLOCK_REALTIME:
+ do_realtime(vdso, ts);
+ return ret;
+ case CLOCK_MONOTONIC:
+ do_monotonic(vdso, ts);
+ return ret;
+ case CLOCK_REALTIME_COARSE:
+ do_realtime_coarse(vdso, ts);
+ return ret;
+ case CLOCK_MONOTONIC_COARSE:
+ do_monotonic_coarse(vdso, ts);
+ return ret;
+ default:
+ return vdso_fallback_gettime(clock, ts);
+ }
+}
+
+int clock_gettime(clockid_t clock, struct timespec *ts)
+ __attribute__((weak, alias("__vdso_clock_gettime")));
--
1.8.3.1

2014-10-02 17:41:51

by John Stultz

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] tile: add clock_gettime support to vDSO

On Thu, Oct 2, 2014 at 7:32 AM, Chris Metcalf <[email protected]> wrote:
> This change adds support for clock_gettime with CLOCK_REALTIME
> and CLOCK_MONOTONIC using vDSO. It also updates the vdso
> struct nomenclature used for the clocks to match the x86 code
> to keep it easier to update going forward.
>
> We also support the *_COARSE clockid_t, for apps that want speed
> but aren't concerned about fine-grained timestamps; this saves
> about 20 cycles per call (see http://lwn.net/Articles/342018/).
>
> Signed-off-by: Chris Metcalf <[email protected]>
> ---
> arch/tile/include/asm/vdso.h | 15 ++--
> arch/tile/kernel/time.c | 45 ++++++++---
> arch/tile/kernel/vdso/vdso.lds.S | 2 +
> arch/tile/kernel/vdso/vgettimeofday.c | 145 +++++++++++++++++++++++++++++-----
> 4 files changed, 172 insertions(+), 35 deletions(-)
>
> v2: use the x86 code as a model to address both John Stultz's
> observation about sub-nanosecond updates, as well as to address
> Thomas Gleixner's observation about minimizing time spent with
> the seqlock held. Note that I kept the "unlikely" that Thomas
> doubted would help, just for parallelism with the x86 code.
> v3: rebase onto 3.17-rc7


Looks ok to me.

Acked-by: John Stultz <[email protected]>

thanks
-john