Some user space applications need to read a couple of different clocks.
Each read requires moving from user space to kernel space.
Reading each clock separately (syscall) introduces extra
unpredictable/unmeasurable delay. Minimizing this delay contributes to user
space actions on these clocks (e.g. synchronization etc).
Introduce a new system call clock_compare, which can be used to measure
the offset between two clocks, from variety of types: PHC, virtual PHC
and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
The system call returns the clocks timestamps.
When possible, use crosstimespec to sync read values.
Else, read clock A twice (before, and after reading clock B) and average these
times – to be as close as possible to the time we read clock B.
Signed-off-by: Sagi Maimon <[email protected]>
---
Addressed comments from:
- Richard Cochran : https://www.spinics.net/lists/netdev/msg964410.html
Changes since version 5:
- take only two clocks time samples
- use crosstimespec if supported
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
drivers/ptp/ptp_clock.c | 34 ++++--
include/linux/posix-clock.h | 2 +
include/linux/syscalls.h | 4 +
include/uapi/asm-generic/unistd.h | 5 +-
kernel/time/posix-clock.c | 25 +++++
kernel/time/posix-timers.c | 145 +++++++++++++++++++++++++
kernel/time/posix-timers.h | 2 +
8 files changed, 207 insertions(+), 11 deletions(-)
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 7e8d46f4147f..727930d27e05 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -383,6 +383,7 @@
459 common lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
+462 common clock_compare sys_clock_compare
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 15b804ba4868..37ce66d4159f 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -156,17 +156,31 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
return err;
}
+static int ptp_clock_getcrosstime(struct posix_clock *pc, struct system_device_crosststamp *xtstamp)
+{
+ struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+ int err;
+
+ if (!ptp->info->getcrosststamp)
+ err = -EOPNOTSUPP;
+ else
+ err = ptp->info->getcrosststamp(ptp->info, xtstamp);
+
+ return err;
+}
+
static struct posix_clock_operations ptp_clock_ops = {
- .owner = THIS_MODULE,
- .clock_adjtime = ptp_clock_adjtime,
- .clock_gettime = ptp_clock_gettime,
- .clock_getres = ptp_clock_getres,
- .clock_settime = ptp_clock_settime,
- .ioctl = ptp_ioctl,
- .open = ptp_open,
- .release = ptp_release,
- .poll = ptp_poll,
- .read = ptp_read,
+ .owner = THIS_MODULE,
+ .clock_adjtime = ptp_clock_adjtime,
+ .clock_gettime = ptp_clock_gettime,
+ .clock_getres = ptp_clock_getres,
+ .clock_settime = ptp_clock_settime,
+ .clock_getcrosstime = ptp_clock_getcrosstime,
+ .ioctl = ptp_ioctl,
+ .open = ptp_open,
+ .release = ptp_release,
+ .poll = ptp_poll,
+ .read = ptp_read,
};
static void ptp_clock_release(struct device *dev)
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index ef8619f48920..3a5b4bb3f56b 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -47,6 +47,8 @@ struct posix_clock_operations {
int (*clock_settime)(struct posix_clock *pc,
const struct timespec64 *ts);
+ int (*clock_getcrosstime)(struct posix_clock *pc,
+ struct system_device_crosststamp *xtstamp);
/*
* Optional character device methods:
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 77eb9b0e7685..ba2ce5b927aa 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1188,6 +1188,10 @@ asmlinkage long sys_ni_syscall(void);
asmlinkage long sys_ni_posix_timers(void);
+asmlinkage long clock_compare(const clockid_t clock_a, const clockid_t clock_b,
+ struct __kernel_timespec __user *tp_a,
+ struct __kernel_timespec __user *tp_b,
+ int64_t __user *offs_err);
/*
* Kernel code should not call syscalls (i.e., sys_xyzyyz()) directly.
* Instead, use one of the functions which work equivalently, such as
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 75f00965ab15..537a35afd237 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -842,8 +842,11 @@ __SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
#define __NR_lsm_list_modules 461
__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
+#define __NR_clock_compare 462
+__SYSCALL(__NR_clock_compare, sys_clock_compare)
+
#undef __NR_syscalls
-#define __NR_syscalls 462
+#define __NR_syscalls 463
/*
* 32 bit systems traditionally used different
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 9de66bbbb3d1..68b2d6741036 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -327,9 +327,34 @@ static int pc_clock_settime(clockid_t id, const struct timespec64 *ts)
return err;
}
+static int pc_clock_get_crosstime(clockid_t id, struct system_device_crosststamp *xtstamp)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
+ err = -EACCES;
+ goto out;
+ }
+
+ if (cd.clk->ops.clock_getcrosstime)
+ err = cd.clk->ops.clock_getcrosstime(cd.clk, xtstamp);
+ else
+ err = -EOPNOTSUPP;
+out:
+ put_clock_desc(&cd);
+
+ return err;
+}
+
const struct k_clock clock_posix_dynamic = {
.clock_getres = pc_clock_getres,
.clock_set = pc_clock_settime,
.clock_get_timespec = pc_clock_gettime,
.clock_adj = pc_clock_adjtime,
+ .clock_get_crosstimespec = pc_clock_get_crosstime,
};
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index b924f0f096fa..ed082664774b 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1426,6 +1426,151 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
#endif
+/**
+ * clock_compare - Get couple of clocks time stamps
+ * @clock_a: clock a ID
+ * @clock_b: clock b ID
+ * @tp_a: Pointer to a user space timespec64 for clock a storage
+ * @tp_b: Pointer to a user space timespec64 for clock b storage
+ *
+ * clock_compare gets time sample of two clocks.
+ * Supported clocks IDs: PHC, virtual PHC and various system clocks.
+ *
+ * In case of PHC that supports crosstimespec and the other clock is Monotonic raw
+ * or system time, crosstimespec will be used to synchronously capture
+ * system/device time stamp.
+ *
+ * In other cases: Read clock_a twice (before, and after reading clock_b) and
+ * average these times – to be as close as possible to the time we read clock_b.
+ *
+ * Returns:
+ * 0 Success. @tp_a and @tp_b contains the time stamps
+ * -EINVAL @clock a or b ID is not a valid clock ID
+ * -EFAULT Copying the time stamp to @tp_a or @tp_b faulted
+ * -EOPNOTSUPP Dynamic POSIX clock does not support crosstimespec()
+ **/
+SYSCALL_DEFINE5(clock_compare, const clockid_t, clock_a, const clockid_t, clock_b,
+ struct __kernel_timespec __user *, tp_a, struct __kernel_timespec __user *,
+ tp_b, int64_t __user *, offs_err)
+{
+ struct timespec64 ts_a1, ts_b, ts_a2;
+ struct system_device_crosststamp xtstamp_a1, xtstamp_a2, xtstamp_b;
+ const struct k_clock *kc_a, *kc_b;
+ ktime_t ktime_a, ktime_a1, ktime_a2;
+ s64 ts_offs, ts_offs_err = 0;
+ int error = 0;
+ bool crosstime_support_a = false;
+ bool crosstime_support_b = false;
+
+ kc_a = clockid_to_kclock(clock_a);
+ if (!kc_a) {
+ error = -EINVAL;
+ return error;
+ }
+
+ kc_b = clockid_to_kclock(clock_b);
+ if (!kc_b) {
+ error = -EINVAL;
+ return error;
+ }
+
+ // In case crosstimespec supported and b clock is Monotonic raw or system
+ // time, synchronously capture system/device time stamp
+ if (clock_a < 0) {
+ error = kc_a->clock_get_crosstimespec(clock_a, &xtstamp_a1);
+ if (!error) {
+ if (clock_b == CLOCK_MONOTONIC_RAW) {
+ ts_b = ktime_to_timespec64(xtstamp_a1.sys_monoraw);
+ ts_a1 = ktime_to_timespec64(xtstamp_a1.device);
+ goto out;
+ } else if (clock_b == CLOCK_REALTIME) {
+ ts_b = ktime_to_timespec64(xtstamp_a1.sys_realtime);
+ ts_a1 = ktime_to_timespec64(xtstamp_a1.device);
+ goto out;
+ } else {
+ crosstime_support_a = true;
+ }
+ }
+ }
+
+ // In case crosstimespec supported and a clock is Monotonic raw or system
+ // time, synchronously capture system/device time stamp
+ if (clock_b < 0) {
+ // Synchronously capture system/device time stamp
+ error = kc_b->clock_get_crosstimespec(clock_b, &xtstamp_b);
+ if (!error) {
+ if (clock_a == CLOCK_MONOTONIC_RAW) {
+ ts_a1 = ktime_to_timespec64(xtstamp_b.sys_monoraw);
+ ts_b = ktime_to_timespec64(xtstamp_b.device);
+ goto out;
+ } else if (clock_a == CLOCK_REALTIME) {
+ ts_a1 = ktime_to_timespec64(xtstamp_b.sys_realtime);
+ ts_b = ktime_to_timespec64(xtstamp_b.device);
+ goto out;
+ } else {
+ crosstime_support_b = true;
+ }
+ }
+ }
+
+ if (crosstime_support_a)
+ error = kc_a->clock_get_crosstimespec(clock_a, &xtstamp_a1);
+ else
+ error = kc_a->clock_get_timespec(clock_a, &ts_a1);
+
+ if (error)
+ return error;
+
+ if (crosstime_support_b)
+ error = kc_b->clock_get_crosstimespec(clock_b, &xtstamp_b);
+ else
+ error = kc_b->clock_get_timespec(clock_b, &ts_b);
+
+ if (error)
+ return error;
+
+ if (crosstime_support_a)
+ error = kc_a->clock_get_crosstimespec(clock_a, &xtstamp_a2);
+ else
+ error = kc_a->clock_get_timespec(clock_a, &ts_a2);
+
+ if (error)
+ return error;
+
+ if (crosstime_support_a) {
+ ktime_a1 = xtstamp_a1.device;
+ ktime_a2 = xtstamp_a2.device;
+ } else {
+ ktime_a1 = timespec64_to_ktime(ts_a1);
+ ktime_a2 = timespec64_to_ktime(ts_a2);
+ }
+
+ ktime_a = ktime_add(ktime_a1, ktime_a2);
+
+ ts_offs = ktime_divns(ktime_a, 2);
+
+ ts_a1 = ns_to_timespec64(ts_offs);
+
+ ktime_a = ktime_sub(ktime_a2, ktime_a1);
+
+ ts_offs_err = ktime_divns(ktime_a, 2);
+
+ if (crosstime_support_b)
+ ts_b = ktime_to_timespec64(xtstamp_a2.device);
+
+out:
+ if (put_timespec64(&ts_a1, tp_a))
+ error = -EFAULT;
+
+ if (!error && put_timespec64(&ts_b, tp_b))
+ error = -EFAULT;
+
+ if (!error && copy_to_user(offs_err, &ts_offs_err, sizeof(ts_offs_err)))
+ error = -EFAULT;
+
+ return error;
+}
+
static const struct k_clock clock_realtime = {
.clock_getres = posix_get_hrtimer_res,
.clock_get_timespec = posix_get_realtime_timespec,
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index f32a2ebba9b8..b1f6075f35bb 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -11,6 +11,8 @@ struct k_clock {
struct timespec64 *tp);
/* Returns the clock value in the root time namespace. */
ktime_t (*clock_get_ktime)(const clockid_t which_clock);
+ int (*clock_get_crosstimespec)(const clockid_t which_clock,
+ struct system_device_crosststamp *xtstamp);
int (*clock_adj)(const clockid_t which_clock, struct __kernel_timex *tx);
int (*timer_create)(struct k_itimer *timer);
int (*nsleep)(const clockid_t which_clock, int flags,
--
2.26.3
On Tue, Mar 12, 2024, at 10:50, Sagi Maimon wrote:
> Some user space applications need to read a couple of different clocks.
> Each read requires moving from user space to kernel space.
> Reading each clock separately (syscall) introduces extra
> unpredictable/unmeasurable delay. Minimizing this delay contributes to user
> space actions on these clocks (e.g. synchronization etc).
>
> Introduce a new system call clock_compare, which can be used to measure
> the offset between two clocks, from variety of types: PHC, virtual PHC
> and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
> The system call returns the clocks timestamps.
>
> When possible, use crosstimespec to sync read values.
> Else, read clock A twice (before, and after reading clock B) and average these
> times – to be as close as possible to the time we read clock B.
>
> Signed-off-by: Sagi Maimon <[email protected]>
I like this a lot better than the previous versions I looked at,
so just a few ideas here how this might be improved further.
> +/**
> + * clock_compare - Get couple of clocks time stamps
> + * @clock_a: clock a ID
> + * @clock_b: clock b ID
> + * @tp_a: Pointer to a user space timespec64 for clock a storage
> + * @tp_b: Pointer to a user space timespec64 for clock b storage
> + *
> + * clock_compare gets time sample of two clocks.
> + * Supported clocks IDs: PHC, virtual PHC and various system clocks.
> + *
> + * In case of PHC that supports crosstimespec and the other clock is
> Monotonic raw
> + * or system time, crosstimespec will be used to synchronously capture
> + * system/device time stamp.
> + *
> + * In other cases: Read clock_a twice (before, and after reading
> clock_b) and
> + * average these times – to be as close as possible to the time we
> read clock_b.
> + *
> + * Returns:
> + * 0 Success. @tp_a and @tp_b contains the time stamps
> + * -EINVAL @clock a or b ID is not a valid clock ID
> + * -EFAULT Copying the time stamp to @tp_a or @tp_b faulted
> + * -EOPNOTSUPP Dynamic POSIX clock does not support crosstimespec()
> + **/
> +SYSCALL_DEFINE5(clock_compare, const clockid_t, clock_a, const
> clockid_t, clock_b,
> + struct __kernel_timespec __user *, tp_a, struct __kernel_timespec
> __user *,
> + tp_b, int64_t __user *, offs_err)
The system call is well-formed in the way that the ABI is the
same across all supported architectures, good.
A minor issue is the use of int64_t, which in user interfaces
can cause namespace problems. Please change that to the kernel
side __s64 type.
> + kc_a = clockid_to_kclock(clock_a);
> + if (!kc_a) {
> + error = -EINVAL;
> + return error;
> + }
> +
> + kc_b = clockid_to_kclock(clock_b);
> + if (!kc_b) {
> + error = -EINVAL;
> + return error;
> + }
I'm not sure if we really need to have it generic enough to
support any combination of clocks here. It complicates the
implementation a bit but it also generalizes the user space
side of it.
Can you think of cases where you want to compare against
something other than CLOCK_MONOTONIC_RAW or CLOCK_REALTIME,
or are these going to be the ones that you expect to
be used anyway?
> + if (crosstime_support_a) {
> + ktime_a1 = xtstamp_a1.device;
> + ktime_a2 = xtstamp_a2.device;
> + } else {
> + ktime_a1 = timespec64_to_ktime(ts_a1);
> + ktime_a2 = timespec64_to_ktime(ts_a2);
> + }
> +
> + ktime_a = ktime_add(ktime_a1, ktime_a2);
> +
> + ts_offs = ktime_divns(ktime_a, 2);
> +
> + ts_a1 = ns_to_timespec64(ts_offs);
Converting nanoseconds to timespec64 is rather expensive,
so I wonder if this could be changed to something cheaper,
either by returning nanoseconds in the end and consistently
working on those, or by doing the calculation on the
timespec64 itself.
Arnd
Hi Arnd
Thanks for you comments.
On Tue, Mar 12, 2024 at 1:19 PM Arnd Bergmann <[email protected]> wrote:
>
> On Tue, Mar 12, 2024, at 10:50, Sagi Maimon wrote:
> > Some user space applications need to read a couple of different clocks.
> > Each read requires moving from user space to kernel space.
> > Reading each clock separately (syscall) introduces extra
> > unpredictable/unmeasurable delay. Minimizing this delay contributes to user
> > space actions on these clocks (e.g. synchronization etc).
> >
> > Introduce a new system call clock_compare, which can be used to measure
> > the offset between two clocks, from variety of types: PHC, virtual PHC
> > and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
> > The system call returns the clocks timestamps.
> >
> > When possible, use crosstimespec to sync read values.
> > Else, read clock A twice (before, and after reading clock B) and average these
> > times – to be as close as possible to the time we read clock B.
> >
> > Signed-off-by: Sagi Maimon <[email protected]>
>
> I like this a lot better than the previous versions I looked at,
> so just a few ideas here how this might be improved further.
>
> > +/**
> > + * clock_compare - Get couple of clocks time stamps
> > + * @clock_a: clock a ID
> > + * @clock_b: clock b ID
> > + * @tp_a: Pointer to a user space timespec64 for clock a storage
> > + * @tp_b: Pointer to a user space timespec64 for clock b storage
> > + *
> > + * clock_compare gets time sample of two clocks.
> > + * Supported clocks IDs: PHC, virtual PHC and various system clocks.
> > + *
> > + * In case of PHC that supports crosstimespec and the other clock is
> > Monotonic raw
> > + * or system time, crosstimespec will be used to synchronously capture
> > + * system/device time stamp.
> > + *
> > + * In other cases: Read clock_a twice (before, and after reading
> > clock_b) and
> > + * average these times – to be as close as possible to the time we
> > read clock_b.
> > + *
> > + * Returns:
> > + * 0 Success. @tp_a and @tp_b contains the time stamps
> > + * -EINVAL @clock a or b ID is not a valid clock ID
> > + * -EFAULT Copying the time stamp to @tp_a or @tp_b faulted
> > + * -EOPNOTSUPP Dynamic POSIX clock does not support crosstimespec()
> > + **/
> > +SYSCALL_DEFINE5(clock_compare, const clockid_t, clock_a, const
> > clockid_t, clock_b,
> > + struct __kernel_timespec __user *, tp_a, struct __kernel_timespec
> > __user *,
> > + tp_b, int64_t __user *, offs_err)
>
> The system call is well-formed in the way that the ABI is the
> same across all supported architectures, good.
>
> A minor issue is the use of int64_t, which in user interfaces
> can cause namespace problems. Please change that to the kernel
> side __s64 type.
>
you are right - it will be fixed on the next patch
> > + kc_a = clockid_to_kclock(clock_a);
> > + if (!kc_a) {
> > + error = -EINVAL;
> > + return error;
> > + }
> > +
> > + kc_b = clockid_to_kclock(clock_b);
> > + if (!kc_b) {
> > + error = -EINVAL;
> > + return error;
> > + }
>
> I'm not sure if we really need to have it generic enough to
> support any combination of clocks here. It complicates the
> implementation a bit but it also generalizes the user space
> side of it.
>
> Can you think of cases where you want to compare against
> something other than CLOCK_MONOTONIC_RAW or CLOCK_REALTIME,
> or are these going to be the ones that you expect to
> be used anyway?
>
sure, one example is syncing two different PHCs (which was originally
why we needed this syscall)
I hope that I have understand your note and that answers your question.
> > + if (crosstime_support_a) {
> > + ktime_a1 = xtstamp_a1.device;
> > + ktime_a2 = xtstamp_a2.device;
> > + } else {
> > + ktime_a1 = timespec64_to_ktime(ts_a1);
> > + ktime_a2 = timespec64_to_ktime(ts_a2);
> > + }
> > +
> > + ktime_a = ktime_add(ktime_a1, ktime_a2);
> > +
> > + ts_offs = ktime_divns(ktime_a, 2);
> > +
> > + ts_a1 = ns_to_timespec64(ts_offs);
>
> Converting nanoseconds to timespec64 is rather expensive,
> so I wonder if this could be changed to something cheaper,
> either by returning nanoseconds in the end and consistently
> working on those, or by doing the calculation on the
> timespec64 itself.
>
I prefer returning timespec64, so this system call aligns with other
system calls like clock_gettime for example.
As far as doing the calculation on timespec64 itself, that looks more
expansive to me, but I might be wrong.
Sagi
> Arnd
On Tue, Mar 12, 2024, at 13:15, Sagi Maimon wrote:
> On Tue, Mar 12, 2024 at 1:19 PM Arnd Bergmann <[email protected]> wrote:
>> On Tue, Mar 12, 2024, at 10:50, Sagi Maimon wrote:
>> > + kc_a = clockid_to_kclock(clock_a);
>> > + if (!kc_a) {
>> > + error = -EINVAL;
>> > + return error;
>> > + }
>> > +
>> > + kc_b = clockid_to_kclock(clock_b);
>> > + if (!kc_b) {
>> > + error = -EINVAL;
>> > + return error;
>> > + }
>>
>> I'm not sure if we really need to have it generic enough to
>> support any combination of clocks here. It complicates the
>> implementation a bit but it also generalizes the user space
>> side of it.
>>
>> Can you think of cases where you want to compare against
>> something other than CLOCK_MONOTONIC_RAW or CLOCK_REALTIME,
>> or are these going to be the ones that you expect to
>> be used anyway?
>>
> sure, one example is syncing two different PHCs (which was originally
> why we needed this syscall)
> I hope that I have understand your note and that answers your question.
Right, that is clearly a sensible use case.
I'm still trying to understand the implementation for the case
where you have two different PHCs and both implement
clock_get_crosstimespec(). Rather than averaging between
two snapshots here, I would expect this to result in
something like
ktime_a1 += xtstamp_b.sys_monoraw - xtstamp_a1.sys_monoraw;
in order get two device timestamps ktime_a1 and ktime_b
that reflect the snapshots as if they were taken
simulatenously. Am I missing some finer detail here,
or is this something you should do?
>> > + if (crosstime_support_a) {
>> > + ktime_a1 = xtstamp_a1.device;
>> > + ktime_a2 = xtstamp_a2.device;
>> > + } else {
>> > + ktime_a1 = timespec64_to_ktime(ts_a1);
>> > + ktime_a2 = timespec64_to_ktime(ts_a2);
>> > + }
>> > +
>> > + ktime_a = ktime_add(ktime_a1, ktime_a2);
>> > +
>> > + ts_offs = ktime_divns(ktime_a, 2);
>> > +
>> > + ts_a1 = ns_to_timespec64(ts_offs);
>>
>> Converting nanoseconds to timespec64 is rather expensive,
>> so I wonder if this could be changed to something cheaper,
>> either by returning nanoseconds in the end and consistently
>> working on those, or by doing the calculation on the
>> timespec64 itself.
>>
> I prefer returning timespec64, so this system call aligns with other
> system calls like clock_gettime for example.
> As far as doing the calculation on timespec64 itself, that looks more
> expansive to me, but I might be wrong.
In the general case, dividing a 64-bit variable by some other
variable is really expensive and will take hundreds of cycles.
This one is a bit cheaper because the division is done using
a constant divider of NS_PER_SEC, which can get optimized fairly
well on many systems by turning it into an equivalent 128-bit
multiplication plus shift.
For the case where you start out with a timespec64, I would
expect it to be cheaper to calculate the nanosecond difference
between ts_a1 and ts_a2 to add half of that to the timespec
than to average two large 64-bit values and convert that back
to a timespec afterwards. This should be fairly easy to try
out if you can test a 32-bit kernel. We could decide that
there is no need to care about anything bug 64-bit kernels
here, in which case your current version should be just as
good for both the crosstime_support_a and !crosstime_support_a
cases.
Arnd
On Tue, Mar 12, 2024 at 3:47 PM Arnd Bergmann <[email protected]> wrote:
>
> On Tue, Mar 12, 2024, at 13:15, Sagi Maimon wrote:
> > On Tue, Mar 12, 2024 at 1:19 PM Arnd Bergmann <[email protected]> wrote:
> >> On Tue, Mar 12, 2024, at 10:50, Sagi Maimon wrote:
> >> > + kc_a = clockid_to_kclock(clock_a);
> >> > + if (!kc_a) {
> >> > + error = -EINVAL;
> >> > + return error;
> >> > + }
> >> > +
> >> > + kc_b = clockid_to_kclock(clock_b);
> >> > + if (!kc_b) {
> >> > + error = -EINVAL;
> >> > + return error;
> >> > + }
> >>
> >> I'm not sure if we really need to have it generic enough to
> >> support any combination of clocks here. It complicates the
> >> implementation a bit but it also generalizes the user space
> >> side of it.
> >>
> >> Can you think of cases where you want to compare against
> >> something other than CLOCK_MONOTONIC_RAW or CLOCK_REALTIME,
> >> or are these going to be the ones that you expect to
> >> be used anyway?
> >>
> > sure, one example is syncing two different PHCs (which was originally
> > why we needed this syscall)
> > I hope that I have understand your note and that answers your question.
>
> Right, that is clearly a sensible use case.
>
> I'm still trying to understand the implementation for the case
> where you have two different PHCs and both implement
> clock_get_crosstimespec(). Rather than averaging between
> two snapshots here, I would expect this to result in
> something like
>
> ktime_a1 += xtstamp_b.sys_monoraw - xtstamp_a1.sys_monoraw;
>
> in order get two device timestamps ktime_a1 and ktime_b
> that reflect the snapshots as if they were taken
> simulatenously. Am I missing some finer detail here,
> or is this something you should do?
>
Since the raw monotonic clock and the PHC are not synthesized, that
won't be accurate at all and depends on the frequency delta between
them.
> >> > + if (crosstime_support_a) {
> >> > + ktime_a1 = xtstamp_a1.device;
> >> > + ktime_a2 = xtstamp_a2.device;
> >> > + } else {
> >> > + ktime_a1 = timespec64_to_ktime(ts_a1);
> >> > + ktime_a2 = timespec64_to_ktime(ts_a2);
> >> > + }
> >> > +
> >> > + ktime_a = ktime_add(ktime_a1, ktime_a2);
> >> > +
> >> > + ts_offs = ktime_divns(ktime_a, 2);
> >> > +
> >> > + ts_a1 = ns_to_timespec64(ts_offs);
> >>
> >> Converting nanoseconds to timespec64 is rather expensive,
> >> so I wonder if this could be changed to something cheaper,
> >> either by returning nanoseconds in the end and consistently
> >> working on those, or by doing the calculation on the
> >> timespec64 itself.
> >>
> > I prefer returning timespec64, so this system call aligns with other
> > system calls like clock_gettime for example.
> > As far as doing the calculation on timespec64 itself, that looks more
> > expansive to me, but I might be wrong.
>
> In the general case, dividing a 64-bit variable by some other
> variable is really expensive and will take hundreds of cycles.
> This one is a bit cheaper because the division is done using
> a constant divider of NS_PER_SEC, which can get optimized fairly
> well on many systems by turning it into an equivalent 128-bit
> multiplication plus shift.
>
> For the case where you start out with a timespec64, I would
> expect it to be cheaper to calculate the nanosecond difference
> between ts_a1 and ts_a2 to add half of that to the timespec
> than to average two large 64-bit values and convert that back
> to a timespec afterwards. This should be fairly easy to try
> out if you can test a 32-bit kernel. We could decide that
> there is no need to care about anything bug 64-bit kernels
> here, in which case your current version should be just as
> good for both the crosstime_support_a and !crosstime_support_a
> cases.
>
sounds good, it will be done on the next patch.
> Arnd