Some user space applications need to read some clocks.
Each read requires moving from user space to kernel space.
The syscall overhead causes unpredictable delay between N clocks reads
Removing this delay causes better synchronization between N clocks.
Introduce a new system call multi_clock_gettime, which can be used to measure
the offset between multiple clocks, from variety of types: PHC, virtual PHC
and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
The offset includes the total time that the driver needs to read the clock
timestamp.
New system call allows the reading of a list of clocks - up to PTP_MAX_CLOCKS.
Supported clocks IDs: PHC, virtual PHC and various system clocks.
Up to PTP_MAX_SAMPLES times (per clock) in a single system call read.
The system call returns n_clocks timestamps for each measurement:
- clock 0 timestamp
- ...
- clock n timestamp
Signed-off-by: Sagi Maimon <[email protected]>
---
Addressed comments from:
- Arnd Bergmann : https://www.spinics.net/lists/netdev/msg962019.html
Changes since version 3:
- Replacing the clkid fixed size arrays with a dynamically sized array.
- Coping only necessary data from user space.
- Calling put_timespec64() only after all time samples taken in order to
reduce the put_timespec64() overhead.
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
include/linux/syscalls.h | 2 +-
include/uapi/asm-generic/unistd.h | 4 +-
include/uapi/linux/multi_clock.h | 21 +++++++++
kernel/time/posix-timers.c | 59 ++++++++++++++++++++++++++
5 files changed, 85 insertions(+), 2 deletions(-)
create mode 100644 include/uapi/linux/multi_clock.h
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 8cb8bf68721c..9cdeb0bf49db 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -378,6 +378,7 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common multi_clock_gettime sys_multi_clock_gettime
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index fd9d12de7e92..b59fa776ab76 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1161,7 +1161,7 @@ asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff);
asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
-
+asmlinkage long sys_multi_clock_gettime(struct __ptp_multi_clock_get __user * ptp_multi_clk_get);
/*
* Not a real system call, but a placeholder for syscalls which are
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 756b013fb832..beb3e0052d3c 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -828,9 +828,11 @@ __SYSCALL(__NR_futex_wake, sys_futex_wake)
__SYSCALL(__NR_futex_wait, sys_futex_wait)
#define __NR_futex_requeue 456
__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
+#define __NR_multi_clock_gettime 457
+__SYSCALL(__NR_multi_clock_gettime, sys_multi_clock_gettime)
#undef __NR_syscalls
-#define __NR_syscalls 457
+#define __NR_syscalls 458
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/multi_clock.h b/include/uapi/linux/multi_clock.h
new file mode 100644
index 000000000000..5e78dac3a533
--- /dev/null
+++ b/include/uapi/linux/multi_clock.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_MULTI_CLOCK_H
+#define _UAPI_MULTI_CLOCK_H
+
+#include <linux/types.h>
+#include <linux/time_types.h>
+
+#define MULTI_PTP_MAX_CLOCKS 32 /* Max number of clocks */
+#define MULTI_PTP_MAX_SAMPLES 32 /* Max allowed offset measurement samples. */
+
+struct __ptp_multi_clock_get {
+ unsigned int n_clocks; /* Desired number of clocks. */
+ unsigned int n_samples; /* Desired number of measurements per clock. */
+ clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
+ /*
+ * Array of list of n_clocks clocks time samples n_samples times.
+ */
+ struct __kernel_timespec ts[MULTI_PTP_MAX_SAMPLES][MULTI_PTP_MAX_CLOCKS];
+};
+
+#endif /* _UAPI_MULTI_CLOCK_H */
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index b924f0f096fa..1d321dc56a25 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -31,6 +31,8 @@
#include <linux/compat.h>
#include <linux/nospec.h>
#include <linux/time_namespace.h>
+#include <linux/multi_clock.h>
+#include <linux/slab.h>
#include "timekeeping.h"
#include "posix-timers.h"
@@ -1426,6 +1428,63 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
#endif
+SYSCALL_DEFINE1(multi_clock_gettime, struct __ptp_multi_clock_get __user *, ptp_multi_clk_get)
+{
+ const struct k_clock *kc;
+ struct timespec64 *kernel_tp;
+ struct timespec64 *kernel_tp_base;
+ unsigned int n_clocks; /* Desired number of clocks. */
+ unsigned int n_samples; /* Desired number of measurements per clock. */
+ unsigned int i, j;
+ clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
+ int error = 0;
+
+ if (copy_from_user(&n_clocks, &ptp_multi_clk_get->n_clocks, sizeof(n_clocks)))
+ return -EFAULT;
+ if (copy_from_user(&n_samples, &ptp_multi_clk_get->n_samples, sizeof(n_samples)))
+ return -EFAULT;
+ if (n_samples > MULTI_PTP_MAX_SAMPLES)
+ return -EINVAL;
+ if (n_clocks > MULTI_PTP_MAX_CLOCKS)
+ return -EINVAL;
+ if (copy_from_user(clkid_arr, &ptp_multi_clk_get->clkid_arr,
+ sizeof(clockid_t) * n_clocks))
+ return -EFAULT;
+
+ kernel_tp_base = kmalloc_array(n_clocks * n_samples,
+ sizeof(struct timespec64), GFP_KERNEL);
+ if (!kernel_tp_base)
+ return -ENOMEM;
+
+ kernel_tp = kernel_tp_base;
+ for (j = 0; j < n_samples; j++) {
+ for (i = 0; i < n_clocks; i++) {
+ kc = clockid_to_kclock(clkid_arr[i]);
+ if (!kc) {
+ error = -EINVAL;
+ goto out;
+ }
+ error = kc->clock_get_timespec(clkid_arr[i], kernel_tp++);
+ if (error)
+ goto out;
+ }
+ }
+
+ kernel_tp = kernel_tp_base;
+ for (j = 0; j < n_samples; j++) {
+ for (i = 0; i < n_clocks; i++) {
+ if (put_timespec64(kernel_tp++, (struct __kernel_timespec __user *)
+ &ptp_multi_clk_get->ts[j][i])) {
+ error = -EFAULT;
+ goto out;
+ }
+ }
+ }
+out:
+ kfree(kernel_tp_base);
+ return error;
+}
+
static const struct k_clock clock_realtime = {
.clock_getres = posix_get_hrtimer_res,
.clock_get_timespec = posix_get_realtime_timespec,
--
2.26.3
On Sun, Dec 31, 2023 at 9:07 AM Sagi Maimon <[email protected]> wrote:
>
> Some user space applications need to read some clocks.
> Each read requires moving from user space to kernel space.
> The syscall overhead causes unpredictable delay between N clocks reads
> Removing this delay causes better synchronization between N clocks.
>
> Introduce a new system call multi_clock_gettime, which can be used to measure
> the offset between multiple clocks, from variety of types: PHC, virtual PHC
> and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
> The offset includes the total time that the driver needs to read the clock
> timestamp.
Knowing this offset sounds quite nice, but...
>
> New system call allows the reading of a list of clocks - up to PTP_MAX_CLOCKS.
> Supported clocks IDs: PHC, virtual PHC and various system clocks.
> Up to PTP_MAX_SAMPLES times (per clock) in a single system call read.
> The system call returns n_clocks timestamps for each measurement:
> - clock 0 timestamp
> - ...
> - clock n timestamp
Could this instead be arranged to read the actual, exact offset?
> + kernel_tp = kernel_tp_base;
> + for (j = 0; j < n_samples; j++) {
> + for (i = 0; i < n_clocks; i++) {
> + if (put_timespec64(kernel_tp++, (struct __kernel_timespec __user *)
> + &ptp_multi_clk_get->ts[j][i])) {
> + error = -EFAULT;
> + goto out;
> + }
> + }
> + }
There are several pairs of clocks that tick at precisely same rate
(and use the same underlying hardware clock), and the offset could be
computed exactly instead of doing this noisy loop that is merely
somewhat less bad than what user code could do all by itself.
Hi Sagi,
kernel test robot noticed the following build errors:
[auto build test ERROR on tip/x86/asm]
[also build test ERROR on arnd-asm-generic/master tip/timers/core linus/master v6.7-rc7]
[cannot apply to next-20231222]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Sagi-Maimon/posix-timers-add-multi_clock_gettime-system-call/20240101-011104
base: tip/x86/asm
patch link: https://lore.kernel.org/r/20231231170721.3381-1-maimon.sagi%40gmail.com
patch subject: [PATCH v4] posix-timers: add multi_clock_gettime system call
config: nios2-randconfig-002-20240101 (https://download.01.org/0day-ci/archive/20240101/[email protected]/config)
compiler: nios2-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240101/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All errors (new ones prefixed by >>):
>> nios2-linux-ld: arch/nios2/kernel/syscall_table.o:(.data+0x724): undefined reference to `sys_multi_clock_gettime'
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Sagi,
kernel test robot noticed the following build errors:
[auto build test ERROR on tip/x86/asm]
[also build test ERROR on tip/timers/core linus/master v6.7-rc7]
[cannot apply to arnd-asm-generic/master next-20231222]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Sagi-Maimon/posix-timers-add-multi_clock_gettime-system-call/20240101-011104
base: tip/x86/asm
patch link: https://lore.kernel.org/r/20231231170721.3381-1-maimon.sagi%40gmail.com
patch subject: [PATCH v4] posix-timers: add multi_clock_gettime system call
config: sparc-allmodconfig (https://download.01.org/0day-ci/archive/20240101/[email protected]/config)
compiler: sparc64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240101/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All errors (new ones prefixed by >>):
In file included from arch/sparc/kernel/setup_64.c:21:
>> include/linux/syscalls.h:1164:48: error: 'struct __ptp_multi_clock_get' declared inside parameter list will not be visible outside of this definition or declaration [-Werror]
1164 | asmlinkage long sys_multi_clock_gettime(struct __ptp_multi_clock_get __user * ptp_multi_clk_get);
| ^~~~~~~~~~~~~~~~~~~~~
arch/sparc/kernel/setup_64.c:602:13: error: no previous prototype for 'alloc_irqstack_bootmem' [-Werror=missing-prototypes]
602 | void __init alloc_irqstack_bootmem(void)
| ^~~~~~~~~~~~~~~~~~~~~~
cc1: all warnings being treated as errors
--
In file included from arch/sparc/kernel/sys_sparc_64.c:25:
>> include/linux/syscalls.h:1164:48: error: 'struct __ptp_multi_clock_get' declared inside parameter list will not be visible outside of this definition or declaration [-Werror]
1164 | asmlinkage long sys_multi_clock_gettime(struct __ptp_multi_clock_get __user * ptp_multi_clk_get);
| ^~~~~~~~~~~~~~~~~~~~~
cc1: all warnings being treated as errors
vim +1164 include/linux/syscalls.h
1154
1155 /* obsolete */
1156 asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second,
1157 unsigned long third, void __user *ptr, long fifth);
1158
1159 /* obsolete */
1160 asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
1161 unsigned long prot, unsigned long flags,
1162 unsigned long fd, unsigned long pgoff);
1163 asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
> 1164 asmlinkage long sys_multi_clock_gettime(struct __ptp_multi_clock_get __user * ptp_multi_clk_get);
1165
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Sun, Dec 31, 2023 at 11:10 PM Andy Lutomirski <[email protected]> wrote:
>
> On Sun, Dec 31, 2023 at 9:07 AM Sagi Maimon <[email protected]> wrote:
> >
> > Some user space applications need to read some clocks.
> > Each read requires moving from user space to kernel space.
> > The syscall overhead causes unpredictable delay between N clocks reads
> > Removing this delay causes better synchronization between N clocks.
> >
> > Introduce a new system call multi_clock_gettime, which can be used to measure
> > the offset between multiple clocks, from variety of types: PHC, virtual PHC
> > and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
> > The offset includes the total time that the driver needs to read the clock
> > timestamp.
Andy Thank you for your notes.
>
> Knowing this offset sounds quite nice, but...
>
> >
> > New system call allows the reading of a list of clocks - up to PTP_MAX_CLOCKS.
> > Supported clocks IDs: PHC, virtual PHC and various system clocks.
> > Up to PTP_MAX_SAMPLES times (per clock) in a single system call read.
> > The system call returns n_clocks timestamps for each measurement:
> > - clock 0 timestamp
> > - ...
> > - clock n timestamp
>
> Could this instead be arranged to read the actual, exact offset?
>
It can be done, but I prefer to leave it generic and consistent with
other time system calls.
In most cases the offset calculation is done in user space application
> > + kernel_tp = kernel_tp_base;
> > + for (j = 0; j < n_samples; j++) {
> > + for (i = 0; i < n_clocks; i++) {
> > + if (put_timespec64(kernel_tp++, (struct __kernel_timespec __user *)
> > + &ptp_multi_clk_get->ts[j][i])) {
> > + error = -EFAULT;
> > + goto out;
> > + }
> > + }
> > + }
>
> There are several pairs of clocks that tick at precisely same rate
> (and use the same underlying hardware clock), and the offset could be
> computed exactly instead of doing this noisy loop that is merely
> somewhat less bad than what user code could do all by itself.
You are correct, there are some PHCs on the same NIC (each per port)
that share the same HW counter/clock.
In that case it is slightly better to do the offset calculation in the
NIC driver code,
but that requires changes in each NIC driver's code.
The main thing is that the multi_clock_gettime system call is a
generic solution,
it covers that case among other cases, for example sync between two
PHCs on different NICs.