2019-06-21 09:53:49

by Vincenzo Frascino

[permalink] [raw]
Subject: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

To take advantage of the commonly defined vdso interface for
gettimeofday the architectural code requires an adaptation.

Re-implement the gettimeofday vdso in C in order to use lib/vdso.

With the new implementation arm64 gains support for CLOCK_BOOTTIME
and CLOCK_TAI.

Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Signed-off-by: Vincenzo Frascino <[email protected]>
Tested-by: Shijith Thotton <[email protected]>
Tested-by: Andre Przywara <[email protected]>
---
arch/arm64/Kconfig | 2 +
arch/arm64/include/asm/vdso/gettimeofday.h | 86 ++++++
arch/arm64/include/asm/vdso/vsyscall.h | 53 ++++
arch/arm64/include/asm/vdso_datapage.h | 48 ---
arch/arm64/kernel/asm-offsets.c | 33 +-
arch/arm64/kernel/vdso.c | 51 +---
arch/arm64/kernel/vdso/Makefile | 34 ++-
arch/arm64/kernel/vdso/gettimeofday.S | 334 ---------------------
arch/arm64/kernel/vdso/vgettimeofday.c | 28 ++
9 files changed, 223 insertions(+), 446 deletions(-)
create mode 100644 arch/arm64/include/asm/vdso/gettimeofday.h
create mode 100644 arch/arm64/include/asm/vdso/vsyscall.h
delete mode 100644 arch/arm64/include/asm/vdso_datapage.h
delete mode 100644 arch/arm64/kernel/vdso/gettimeofday.S
create mode 100644 arch/arm64/kernel/vdso/vgettimeofday.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 697ea0510729..952c9f8cf3b8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -107,6 +107,7 @@ config ARM64
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+ select GENERIC_GETTIMEOFDAY
select HANDLE_DOMAIN_IRQ
select HARDIRQS_SW_RESEND
select HAVE_PCI
@@ -160,6 +161,7 @@ config ARM64
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_GENERIC_VDSO
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..bc3cb6738051
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <uapi/linux/time.h>
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+static __always_inline int gettimeofday_fallback(
+ struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ register struct timezone *tz asm("x1") = _tz;
+ register struct __kernel_old_timeval *tv asm("x0") = _tv;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_gettimeofday;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (tv), "r" (tz), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline long clock_gettime_fallback(
+ clockid_t _clkid,
+ struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("x1") = _ts;
+ register clockid_t clkid asm("x0") = _clkid;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_clock_gettime;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline int clock_getres_fallback(
+ clockid_t _clkid,
+ struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("x1") = _ts;
+ register clockid_t clkid asm("x0") = _clkid;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_clock_getres;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ u64 res;
+
+ asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
+
+ return res;
+}
+
+static __always_inline
+const struct vdso_data *__arch_get_vdso_data(void)
+{
+ return _vdso_data;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..0c731bfc7c8c
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+
+#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
+
+extern struct vdso_data *vdso_data;
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__arm64_get_k_vdso_data(void)
+{
+ return vdso_data;
+}
+#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
+
+static __always_inline
+int __arm64_get_clock_mode(struct timekeeper *tk)
+{
+ u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
+
+ return use_syscall;
+}
+#define __arch_get_clock_mode __arm64_get_clock_mode
+
+static __always_inline
+int __arm64_use_vsyscall(struct vdso_data *vdata)
+{
+ return !vdata[CS_HRES_COARSE].clock_mode;
+}
+#define __arch_use_vsyscall __arm64_use_vsyscall
+
+static __always_inline
+void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
+{
+ vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
+ vdata[CS_RAW].mask = VDSO_PRECISION_MASK;
+}
+#define __arch_update_vsyscall __arm64_update_vsyscall
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
deleted file mode 100644
index f89263c8e11a..000000000000
--- a/arch/arm64/include/asm/vdso_datapage.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Limited
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_VDSO_DATAPAGE_H
-#define __ASM_VDSO_DATAPAGE_H
-
-#ifdef __KERNEL__
-
-#ifndef __ASSEMBLY__
-
-struct vdso_data {
- __u64 cs_cycle_last; /* Timebase at clocksource init */
- __u64 raw_time_sec; /* Raw time */
- __u64 raw_time_nsec;
- __u64 xtime_clock_sec; /* Kernel time */
- __u64 xtime_clock_nsec;
- __u64 xtime_coarse_sec; /* Coarse time */
- __u64 xtime_coarse_nsec;
- __u64 wtm_clock_sec; /* Wall to monotonic time */
- __u64 wtm_clock_nsec;
- __u32 tb_seq_count; /* Timebase sequence counter */
- /* cs_* members must be adjacent and in this order (ldp accesses) */
- __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */
- __u32 cs_shift; /* Clocksource shift (mono = raw) */
- __u32 cs_raw_mult; /* Raw clocksource multiplier */
- __u32 tz_minuteswest; /* Whacky timezone stuff */
- __u32 tz_dsttime;
- __u32 use_syscall;
- __u32 hrtimer_res;
-};
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASM_VDSO_DATAPAGE_H */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 947e39896e28..9e4b7ccbab2f 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -25,13 +25,13 @@
#include <linux/kvm_host.h>
#include <linux/preempt.h>
#include <linux/suspend.h>
+#include <vdso/datapage.h>
#include <asm/cpufeature.h>
#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
-#include <asm/vdso_datapage.h>
#include <linux/kbuild.h>
#include <linux/arm-smccc.h>

@@ -100,17 +100,28 @@ int main(void)
DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
BLANK();
- DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
- DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
- DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
- DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
- DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
- DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
- DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
- DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
- DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
+ DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq));
+ DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode));
+ DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last));
+ DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask));
+ DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult));
+ DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift));
+ DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec));
+ DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec));
+ DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec));
+ DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec));
+ DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec));
+ DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec));
+ DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec));
+ DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec));
+ DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec));
+ DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec));
+ DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec));
+ DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec));
+ DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec));
+ DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec));
DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
- DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall));
+ DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
BLANK();
DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 8074cbd3a3a8..23c38303a52a 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -31,11 +31,13 @@
#include <linux/slab.h>
#include <linux/timekeeper_internal.h>
#include <linux/vmalloc.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
+#include <vdso/vsyscall.h>

#include <asm/cacheflush.h>
#include <asm/signal32.h>
#include <asm/vdso.h>
-#include <asm/vdso_datapage.h>

extern char vdso_start[], vdso_end[];
static unsigned long vdso_pages __ro_after_init;
@@ -44,10 +46,10 @@ static unsigned long vdso_pages __ro_after_init;
* The vDSO data page.
*/
static union {
- struct vdso_data data;
+ struct vdso_data data[CS_BASES];
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+struct vdso_data *vdso_data = vdso_data_store.data;

#ifdef CONFIG_COMPAT
/*
@@ -280,46 +282,3 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
up_write(&mm->mmap_sem);
return PTR_ERR(ret);
}
-
-/*
- * Update the vDSO data page to keep in sync with kernel timekeeping.
- */
-void update_vsyscall(struct timekeeper *tk)
-{
- u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
-
- ++vdso_data->tb_seq_count;
- smp_wmb();
-
- vdso_data->use_syscall = use_syscall;
- vdso_data->xtime_coarse_sec = tk->xtime_sec;
- vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
- tk->tkr_mono.shift;
- vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec;
- vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
-
- /* Read without the seqlock held by clock_getres() */
- WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution);
-
- if (!use_syscall) {
- /* tkr_mono.cycle_last == tkr_raw.cycle_last */
- vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
- vdso_data->raw_time_sec = tk->raw_sec;
- vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
- vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
- vdso_data->cs_mono_mult = tk->tkr_mono.mult;
- vdso_data->cs_raw_mult = tk->tkr_raw.mult;
- /* tkr_mono.shift == tkr_raw.shift */
- vdso_data->cs_shift = tk->tkr_mono.shift;
- }
-
- smp_wmb();
- ++vdso_data->tb_seq_count;
-}
-
-void update_vsyscall_tz(void)
-{
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-}
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index fa230ff09aa1..3acfc813e966 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -6,7 +6,12 @@
# Heavily based on the vDSO Makefiles for other archs.
#

-obj-vdso := gettimeofday.o note.o sigreturn.o
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64
+include $(srctree)/lib/vdso/Makefile
+
+obj-vdso := vgettimeofday.o note.o sigreturn.o

# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg
@@ -15,6 +20,24 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
--build-id -n -T

+ccflags-y := -fno-common -fno-builtin -fno-stack-protector
+ccflags-y += -DDISABLE_BRANCH_PROFILING
+
+VDSO_LDFLAGS := -Bsymbolic
+
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+KBUILD_CFLAGS += $(DISABLE_LTO)
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+OBJECT_FILES_NON_STANDARD := y
+KCOV_INSTRUMENT := n
+
+ifeq ($(c-gettimeofday-y),)
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
+else
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
+endif
+
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n

@@ -28,6 +51,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
# Link rule for the .so file, .lds has to be first
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,ld)
+ $(call if_changed,vdso_check)

# Strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -42,13 +66,9 @@ quiet_cmd_vdsosym = VDSOSYM $@
include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)

-# Assembly rules for the .S files
-$(obj-vdso): %.o: %.S FORCE
- $(call if_changed_dep,vdsoas)
-
# Actual build commands
-quiet_cmd_vdsoas = VDSOA $@
- cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdsocc = VDSOCC $@
+ cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<

# Install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
deleted file mode 100644
index 856fee6d3512..000000000000
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * Userspace implementations of gettimeofday() and friends.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Will Deacon <[email protected]>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-#define NSEC_PER_SEC_LO16 0xca00
-#define NSEC_PER_SEC_HI16 0x3b9a
-
-vdso_data .req x6
-seqcnt .req w7
-w_tmp .req w8
-x_tmp .req x8
-
-/*
- * Conventions for macro arguments:
- * - An argument is write-only if its name starts with "res".
- * - All other arguments are read-only, unless otherwise specified.
- */
-
- .macro seqcnt_acquire
-9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
- tbnz seqcnt, #0, 9999b
- dmb ishld
- .endm
-
- .macro seqcnt_check fail
- dmb ishld
- ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
- cmp w_tmp, seqcnt
- b.ne \fail
- .endm
-
- .macro syscall_check fail
- ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
- cbnz w_tmp, \fail
- .endm
-
- .macro get_nsec_per_sec res
- mov \res, #NSEC_PER_SEC_LO16
- movk \res, #NSEC_PER_SEC_HI16, lsl #16
- .endm
-
- /*
- * Returns the clock delta, in nanoseconds left-shifted by the clock
- * shift.
- */
- .macro get_clock_shifted_nsec res, cycle_last, mult
- /* Read the virtual counter. */
- isb
- mrs x_tmp, cntvct_el0
- /* Calculate cycle delta and convert to ns. */
- sub \res, x_tmp, \cycle_last
- /* We can only guarantee 56 bits of precision. */
- movn x_tmp, #0xff00, lsl #48
- and \res, x_tmp, \res
- mul \res, \res, \mult
- /*
- * Fake address dependency from the value computed from the counter
- * register to subsequent data page accesses so that the sequence
- * locking also orders the read of the counter.
- */
- and x_tmp, \res, xzr
- add vdso_data, vdso_data, x_tmp
- .endm
-
- /*
- * Returns in res_{sec,nsec} the REALTIME timespec, based on the
- * "wall time" (xtime) and the clock_mono delta.
- */
- .macro get_ts_realtime res_sec, res_nsec, \
- clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
- add \res_nsec, \clock_nsec, \xtime_nsec
- udiv x_tmp, \res_nsec, \nsec_to_sec
- add \res_sec, \xtime_sec, x_tmp
- msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec
- .endm
-
- /*
- * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
- * used for CLOCK_MONOTONIC_RAW.
- */
- .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
- udiv \res_sec, \clock_nsec, \nsec_to_sec
- msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
- .endm
-
- /* sec and nsec are modified in place. */
- .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
- /* Add timespec. */
- add \sec, \sec, \ts_sec
- add \nsec, \nsec, \ts_nsec
-
- /* Normalise the new timespec. */
- cmp \nsec, \nsec_to_sec
- b.lt 9999f
- sub \nsec, \nsec, \nsec_to_sec
- add \sec, \sec, #1
-9999:
- cmp \nsec, #0
- b.ge 9998f
- add \nsec, \nsec, \nsec_to_sec
- sub \sec, \sec, #1
-9998:
- .endm
-
- .macro clock_gettime_return, shift=0
- .if \shift == 1
- lsr x11, x11, x12
- .endif
- stp x10, x11, [x1, #TSPEC_TV_SEC]
- mov x0, xzr
- ret
- .endm
-
- .macro jump_slot jumptable, index, label
- .if (. - \jumptable) != 4 * (\index)
- .error "Jump slot index mismatch"
- .endif
- b \label
- .endm
-
- .text
-
-/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
-ENTRY(__kernel_gettimeofday)
- .cfi_startproc
- adr vdso_data, _vdso_data
- /* If tv is NULL, skip to the timezone code. */
- cbz x0, 2f
-
- /* Compute the time of day. */
-1: seqcnt_acquire
- syscall_check fail=4f
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=1b
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
- /* Convert ns to us. */
- mov x13, #1000
- lsl x13, x13, x12
- udiv x11, x11, x13
- stp x10, x11, [x0, #TVAL_TV_SEC]
-2:
- /* If tz is NULL, return 0. */
- cbz x1, 3f
- ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
- stp w4, w5, [x1, #TZ_MINWEST]
-3:
- mov x0, xzr
- ret
-4:
- /* Syscall fallback. */
- mov x8, #__NR_gettimeofday
- svc #0
- ret
- .cfi_endproc
-ENDPROC(__kernel_gettimeofday)
-
-#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
-
-/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
-ENTRY(__kernel_clock_gettime)
- .cfi_startproc
- cmp w0, #JUMPSLOT_MAX
- b.hi syscall
- adr vdso_data, _vdso_data
- adr x_tmp, jumptable
- add x_tmp, x_tmp, w0, uxtw #2
- br x_tmp
-
- ALIGN
-jumptable:
- jump_slot jumptable, CLOCK_REALTIME, realtime
- jump_slot jumptable, CLOCK_MONOTONIC, monotonic
- b syscall
- b syscall
- jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
- jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
- jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
-
- .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
- .error "Wrong jumptable size"
- .endif
-
- ALIGN
-realtime:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=realtime
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-monotonic:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
- ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- lsl x4, x4, x12
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=monotonic
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
- add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-monotonic_raw:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_raw_mult, w12 = cs_shift */
- ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT]
- ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=monotonic_raw
- get_ts_clock_raw res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, nsec_to_sec=x9
-
- add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-realtime_coarse:
- seqcnt_acquire
- ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
- seqcnt_check fail=realtime_coarse
- clock_gettime_return
-
- ALIGN
-monotonic_coarse:
- seqcnt_acquire
- ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
- ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
- seqcnt_check fail=monotonic_coarse
-
- /* Computations are done in (non-shifted) nsecs. */
- get_nsec_per_sec res=x9
- add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return
-
- ALIGN
-syscall: /* Syscall fallback. */
- mov x8, #__NR_clock_gettime
- svc #0
- ret
- .cfi_endproc
-ENDPROC(__kernel_clock_gettime)
-
-/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */
-ENTRY(__kernel_clock_getres)
- .cfi_startproc
- cmp w0, #CLOCK_REALTIME
- ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
- ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
- b.ne 1f
-
- adr vdso_data, _vdso_data
- ldr w2, [vdso_data, #CLOCK_REALTIME_RES]
- b 2f
-1:
- cmp w0, #CLOCK_REALTIME_COARSE
- ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
- b.ne 4f
- ldr x2, 5f
-2:
- cbz x1, 3f
- stp xzr, x2, [x1]
-
-3: /* res == NULL. */
- mov w0, wzr
- ret
-
-4: /* Syscall fallback. */
- mov x8, #__NR_clock_getres
- svc #0
- ret
-5:
- .quad CLOCK_COARSE_RES
- .cfi_endproc
-ENDPROC(__kernel_clock_getres)
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..3c58f19dbdf4
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 userspace implementations of gettimeofday() and similar.
+ *
+ * Copyright (C) 2018 ARM Limited
+ *
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __kernel_clock_gettime(clockid_t clock,
+ struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_gettime(clock, ts);
+}
+
+int __kernel_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return __cvdso_gettimeofday(tv, tz);
+}
+
+int __kernel_clock_getres(clockid_t clock_id,
+ struct __kernel_timespec *res)
+{
+ return __cvdso_clock_getres(clock_id, res);
+}
+
--
2.21.0


Subject: [tip:timers/vdso] arm64: vdso: Substitute gettimeofday() with C implementation

Commit-ID: 28b1a824a4f44da46983cd2c3249f910bd4b797b
Gitweb: https://git.kernel.org/tip/28b1a824a4f44da46983cd2c3249f910bd4b797b
Author: Vincenzo Frascino <[email protected]>
AuthorDate: Fri, 21 Jun 2019 10:52:31 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Sat, 22 Jun 2019 21:21:06 +0200

arm64: vdso: Substitute gettimeofday() with C implementation

To take advantage of the commonly defined vdso interface for gettimeofday()
the architectural code requires an adaptation.

Re-implement the gettimeofday VDSO in C in order to use lib/vdso.

With the new implementation arm64 gains support for CLOCK_BOOTTIME
and CLOCK_TAI.

[ tglx: Reformatted the function line breaks ]

Signed-off-by: Vincenzo Frascino <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Shijith Thotton <[email protected]>
Tested-by: Andre Przywara <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Russell King <[email protected]>
Cc: Ralf Baechle <[email protected]>
Cc: Paul Burton <[email protected]>
Cc: Daniel Lezcano <[email protected]>
Cc: Mark Salyzyn <[email protected]>
Cc: Peter Collingbourne <[email protected]>
Cc: Shuah Khan <[email protected]>
Cc: Dmitry Safonov <[email protected]>
Cc: Rasmus Villemoes <[email protected]>
Cc: Huw Davies <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/arm64/Kconfig | 2 +
arch/arm64/include/asm/vdso/gettimeofday.h | 84 ++++++++
arch/arm64/include/asm/vdso/vsyscall.h | 53 +++++
arch/arm64/kernel/asm-offsets.c | 33 ++-
arch/arm64/kernel/vdso.c | 51 +----
arch/arm64/kernel/vdso/Makefile | 34 ++-
arch/arm64/kernel/vdso/gettimeofday.S | 323 -----------------------------
arch/arm64/kernel/vdso/vgettimeofday.c | 27 +++
8 files changed, 220 insertions(+), 387 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 697ea0510729..952c9f8cf3b8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -107,6 +107,7 @@ config ARM64
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+ select GENERIC_GETTIMEOFDAY
select HANDLE_DOMAIN_IRQ
select HARDIRQS_SW_RESEND
select HAVE_PCI
@@ -160,6 +161,7 @@ config ARM64
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_GENERIC_VDSO
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..447ef417de45
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <uapi/linux/time.h>
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ register struct timezone *tz asm("x1") = _tz;
+ register struct __kernel_old_timeval *tv asm("x0") = _tv;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_gettimeofday;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (tv), "r" (tz), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("x1") = _ts;
+ register clockid_t clkid asm("x0") = _clkid;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_clock_gettime;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("x1") = _ts;
+ register clockid_t clkid asm("x0") = _clkid;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_clock_getres;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ u64 res;
+
+ asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
+
+ return res;
+}
+
+static __always_inline
+const struct vdso_data *__arch_get_vdso_data(void)
+{
+ return _vdso_data;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..0c731bfc7c8c
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+
+#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
+
+extern struct vdso_data *vdso_data;
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__arm64_get_k_vdso_data(void)
+{
+ return vdso_data;
+}
+#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
+
+static __always_inline
+int __arm64_get_clock_mode(struct timekeeper *tk)
+{
+ u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
+
+ return use_syscall;
+}
+#define __arch_get_clock_mode __arm64_get_clock_mode
+
+static __always_inline
+int __arm64_use_vsyscall(struct vdso_data *vdata)
+{
+ return !vdata[CS_HRES_COARSE].clock_mode;
+}
+#define __arch_use_vsyscall __arm64_use_vsyscall
+
+static __always_inline
+void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
+{
+ vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
+ vdata[CS_RAW].mask = VDSO_PRECISION_MASK;
+}
+#define __arch_update_vsyscall __arm64_update_vsyscall
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 02f08768c298..14c99b7a0c0e 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -14,13 +14,13 @@
#include <linux/kvm_host.h>
#include <linux/preempt.h>
#include <linux/suspend.h>
+#include <vdso/datapage.h>
#include <asm/cpufeature.h>
#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
-#include <asm/vdso_datapage.h>
#include <linux/kbuild.h>
#include <linux/arm-smccc.h>

@@ -89,17 +89,28 @@ int main(void)
DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
BLANK();
- DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
- DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
- DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
- DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
- DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
- DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
- DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
- DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
- DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
+ DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq));
+ DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode));
+ DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last));
+ DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask));
+ DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult));
+ DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift));
+ DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec));
+ DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec));
+ DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec));
+ DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec));
+ DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec));
+ DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec));
+ DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec));
+ DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec));
+ DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec));
+ DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec));
+ DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec));
+ DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec));
+ DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec));
+ DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec));
DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
- DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall));
+ DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
BLANK();
DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 663b166241d0..478ec865a413 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -20,11 +20,13 @@
#include <linux/slab.h>
#include <linux/timekeeper_internal.h>
#include <linux/vmalloc.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
+#include <vdso/vsyscall.h>

#include <asm/cacheflush.h>
#include <asm/signal32.h>
#include <asm/vdso.h>
-#include <asm/vdso_datapage.h>

extern char vdso_start[], vdso_end[];
static unsigned long vdso_pages __ro_after_init;
@@ -33,10 +35,10 @@ static unsigned long vdso_pages __ro_after_init;
* The vDSO data page.
*/
static union {
- struct vdso_data data;
+ struct vdso_data data[CS_BASES];
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+struct vdso_data *vdso_data = vdso_data_store.data;

#ifdef CONFIG_COMPAT
/*
@@ -269,46 +271,3 @@ up_fail:
up_write(&mm->mmap_sem);
return PTR_ERR(ret);
}
-
-/*
- * Update the vDSO data page to keep in sync with kernel timekeeping.
- */
-void update_vsyscall(struct timekeeper *tk)
-{
- u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
-
- ++vdso_data->tb_seq_count;
- smp_wmb();
-
- vdso_data->use_syscall = use_syscall;
- vdso_data->xtime_coarse_sec = tk->xtime_sec;
- vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
- tk->tkr_mono.shift;
- vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec;
- vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
-
- /* Read without the seqlock held by clock_getres() */
- WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution);
-
- if (!use_syscall) {
- /* tkr_mono.cycle_last == tkr_raw.cycle_last */
- vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
- vdso_data->raw_time_sec = tk->raw_sec;
- vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
- vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
- vdso_data->cs_mono_mult = tk->tkr_mono.mult;
- vdso_data->cs_raw_mult = tk->tkr_raw.mult;
- /* tkr_mono.shift == tkr_raw.shift */
- vdso_data->cs_shift = tk->tkr_mono.shift;
- }
-
- smp_wmb();
- ++vdso_data->tb_seq_count;
-}
-
-void update_vsyscall_tz(void)
-{
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-}
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index fa230ff09aa1..3acfc813e966 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -6,7 +6,12 @@
# Heavily based on the vDSO Makefiles for other archs.
#

-obj-vdso := gettimeofday.o note.o sigreturn.o
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64
+include $(srctree)/lib/vdso/Makefile
+
+obj-vdso := vgettimeofday.o note.o sigreturn.o

# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg
@@ -15,6 +20,24 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
--build-id -n -T

+ccflags-y := -fno-common -fno-builtin -fno-stack-protector
+ccflags-y += -DDISABLE_BRANCH_PROFILING
+
+VDSO_LDFLAGS := -Bsymbolic
+
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+KBUILD_CFLAGS += $(DISABLE_LTO)
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+OBJECT_FILES_NON_STANDARD := y
+KCOV_INSTRUMENT := n
+
+ifeq ($(c-gettimeofday-y),)
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
+else
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
+endif
+
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n

@@ -28,6 +51,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
# Link rule for the .so file, .lds has to be first
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,ld)
+ $(call if_changed,vdso_check)

# Strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -42,13 +66,9 @@ quiet_cmd_vdsosym = VDSOSYM $@
include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)

-# Assembly rules for the .S files
-$(obj-vdso): %.o: %.S FORCE
- $(call if_changed_dep,vdsoas)
-
# Actual build commands
-quiet_cmd_vdsoas = VDSOA $@
- cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdsocc = VDSOCC $@
+ cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<

# Install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index 80f780f56e0d..e69de29bb2d1 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -1,323 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Userspace implementations of gettimeofday() and friends.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <[email protected]>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-#define NSEC_PER_SEC_LO16 0xca00
-#define NSEC_PER_SEC_HI16 0x3b9a
-
-vdso_data .req x6
-seqcnt .req w7
-w_tmp .req w8
-x_tmp .req x8
-
-/*
- * Conventions for macro arguments:
- * - An argument is write-only if its name starts with "res".
- * - All other arguments are read-only, unless otherwise specified.
- */
-
- .macro seqcnt_acquire
-9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
- tbnz seqcnt, #0, 9999b
- dmb ishld
- .endm
-
- .macro seqcnt_check fail
- dmb ishld
- ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
- cmp w_tmp, seqcnt
- b.ne \fail
- .endm
-
- .macro syscall_check fail
- ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
- cbnz w_tmp, \fail
- .endm
-
- .macro get_nsec_per_sec res
- mov \res, #NSEC_PER_SEC_LO16
- movk \res, #NSEC_PER_SEC_HI16, lsl #16
- .endm
-
- /*
- * Returns the clock delta, in nanoseconds left-shifted by the clock
- * shift.
- */
- .macro get_clock_shifted_nsec res, cycle_last, mult
- /* Read the virtual counter. */
- isb
- mrs x_tmp, cntvct_el0
- /* Calculate cycle delta and convert to ns. */
- sub \res, x_tmp, \cycle_last
- /* We can only guarantee 56 bits of precision. */
- movn x_tmp, #0xff00, lsl #48
- and \res, x_tmp, \res
- mul \res, \res, \mult
- /*
- * Fake address dependency from the value computed from the counter
- * register to subsequent data page accesses so that the sequence
- * locking also orders the read of the counter.
- */
- and x_tmp, \res, xzr
- add vdso_data, vdso_data, x_tmp
- .endm
-
- /*
- * Returns in res_{sec,nsec} the REALTIME timespec, based on the
- * "wall time" (xtime) and the clock_mono delta.
- */
- .macro get_ts_realtime res_sec, res_nsec, \
- clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
- add \res_nsec, \clock_nsec, \xtime_nsec
- udiv x_tmp, \res_nsec, \nsec_to_sec
- add \res_sec, \xtime_sec, x_tmp
- msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec
- .endm
-
- /*
- * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
- * used for CLOCK_MONOTONIC_RAW.
- */
- .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
- udiv \res_sec, \clock_nsec, \nsec_to_sec
- msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
- .endm
-
- /* sec and nsec are modified in place. */
- .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
- /* Add timespec. */
- add \sec, \sec, \ts_sec
- add \nsec, \nsec, \ts_nsec
-
- /* Normalise the new timespec. */
- cmp \nsec, \nsec_to_sec
- b.lt 9999f
- sub \nsec, \nsec, \nsec_to_sec
- add \sec, \sec, #1
-9999:
- cmp \nsec, #0
- b.ge 9998f
- add \nsec, \nsec, \nsec_to_sec
- sub \sec, \sec, #1
-9998:
- .endm
-
- .macro clock_gettime_return, shift=0
- .if \shift == 1
- lsr x11, x11, x12
- .endif
- stp x10, x11, [x1, #TSPEC_TV_SEC]
- mov x0, xzr
- ret
- .endm
-
- .macro jump_slot jumptable, index, label
- .if (. - \jumptable) != 4 * (\index)
- .error "Jump slot index mismatch"
- .endif
- b \label
- .endm
-
- .text
-
-/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
-ENTRY(__kernel_gettimeofday)
- .cfi_startproc
- adr vdso_data, _vdso_data
- /* If tv is NULL, skip to the timezone code. */
- cbz x0, 2f
-
- /* Compute the time of day. */
-1: seqcnt_acquire
- syscall_check fail=4f
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=1b
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
- /* Convert ns to us. */
- mov x13, #1000
- lsl x13, x13, x12
- udiv x11, x11, x13
- stp x10, x11, [x0, #TVAL_TV_SEC]
-2:
- /* If tz is NULL, return 0. */
- cbz x1, 3f
- ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
- stp w4, w5, [x1, #TZ_MINWEST]
-3:
- mov x0, xzr
- ret
-4:
- /* Syscall fallback. */
- mov x8, #__NR_gettimeofday
- svc #0
- ret
- .cfi_endproc
-ENDPROC(__kernel_gettimeofday)
-
-#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
-
-/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
-ENTRY(__kernel_clock_gettime)
- .cfi_startproc
- cmp w0, #JUMPSLOT_MAX
- b.hi syscall
- adr vdso_data, _vdso_data
- adr x_tmp, jumptable
- add x_tmp, x_tmp, w0, uxtw #2
- br x_tmp
-
- ALIGN
-jumptable:
- jump_slot jumptable, CLOCK_REALTIME, realtime
- jump_slot jumptable, CLOCK_MONOTONIC, monotonic
- b syscall
- b syscall
- jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
- jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
- jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
-
- .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
- .error "Wrong jumptable size"
- .endif
-
- ALIGN
-realtime:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=realtime
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-monotonic:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
- ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- lsl x4, x4, x12
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=monotonic
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
- add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-monotonic_raw:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_raw_mult, w12 = cs_shift */
- ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT]
- ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=monotonic_raw
- get_ts_clock_raw res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, nsec_to_sec=x9
-
- add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-realtime_coarse:
- seqcnt_acquire
- ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
- seqcnt_check fail=realtime_coarse
- clock_gettime_return
-
- ALIGN
-monotonic_coarse:
- seqcnt_acquire
- ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
- ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
- seqcnt_check fail=monotonic_coarse
-
- /* Computations are done in (non-shifted) nsecs. */
- get_nsec_per_sec res=x9
- add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return
-
- ALIGN
-syscall: /* Syscall fallback. */
- mov x8, #__NR_clock_gettime
- svc #0
- ret
- .cfi_endproc
-ENDPROC(__kernel_clock_gettime)
-
-/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */
-ENTRY(__kernel_clock_getres)
- .cfi_startproc
- cmp w0, #CLOCK_REALTIME
- ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
- ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
- b.ne 1f
-
- adr vdso_data, _vdso_data
- ldr w2, [vdso_data, #CLOCK_REALTIME_RES]
- b 2f
-1:
- cmp w0, #CLOCK_REALTIME_COARSE
- ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
- b.ne 4f
- ldr x2, 5f
-2:
- cbz x1, 3f
- stp xzr, x2, [x1]
-
-3: /* res == NULL. */
- mov w0, wzr
- ret
-
-4: /* Syscall fallback. */
- mov x8, #__NR_clock_getres
- svc #0
- ret
-5:
- .quad CLOCK_COARSE_RES
- .cfi_endproc
-ENDPROC(__kernel_clock_getres)
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..747635501a14
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 userspace implementations of gettimeofday() and similar.
+ *
+ * Copyright (C) 2018 ARM Limited
+ *
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __kernel_clock_gettime(clockid_t clock,
+ struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_gettime(clock, ts);
+}
+
+int __kernel_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return __cvdso_gettimeofday(tv, tz);
+}
+
+int __kernel_clock_getres(clockid_t clock_id,
+ struct __kernel_timespec *res)
+{
+ return __cvdso_clock_getres(clock_id, res);
+}

2019-06-24 14:15:50

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

Hi Vincenzo,

On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote:
> To take advantage of the commonly defined vdso interface for
> gettimeofday the architectural code requires an adaptation.
>
> Re-implement the gettimeofday vdso in C in order to use lib/vdso.
>
> With the new implementation arm64 gains support for CLOCK_BOOTTIME
> and CLOCK_TAI.
>
> Cc: Catalin Marinas <[email protected]>
> Cc: Will Deacon <[email protected]>
> Signed-off-by: Vincenzo Frascino <[email protected]>
> Tested-by: Shijith Thotton <[email protected]>
> Tested-by: Andre Przywara <[email protected]>
> ---
> arch/arm64/Kconfig | 2 +
> arch/arm64/include/asm/vdso/gettimeofday.h | 86 ++++++
> arch/arm64/include/asm/vdso/vsyscall.h | 53 ++++
> arch/arm64/include/asm/vdso_datapage.h | 48 ---
> arch/arm64/kernel/asm-offsets.c | 33 +-
> arch/arm64/kernel/vdso.c | 51 +---
> arch/arm64/kernel/vdso/Makefile | 34 ++-
> arch/arm64/kernel/vdso/gettimeofday.S | 334 ---------------------
> arch/arm64/kernel/vdso/vgettimeofday.c | 28 ++

I'm concerned about an apparent semantic change introduced by your patch:

> +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
> +{
> + u64 res;
> +
> + asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
> +
> + return res;
> +}

vs:

> - .macro get_clock_shifted_nsec res, cycle_last, mult
> - /* Read the virtual counter. */
> - isb
> - mrs x_tmp, cntvct_el0
> - /* Calculate cycle delta and convert to ns. */
> - sub \res, x_tmp, \cycle_last
> - /* We can only guarantee 56 bits of precision. */
> - movn x_tmp, #0xff00, lsl #48
> - and \res, x_tmp, \res
> - mul \res, \res, \mult
> - /*
> - * Fake address dependency from the value computed from the counter
> - * register to subsequent data page accesses so that the sequence
> - * locking also orders the read of the counter.
> - */
> - and x_tmp, \res, xzr
> - add vdso_data, vdso_data, x_tmp
> - .endm

It looks like you're dropping both the preceding ISB (allowing the counter
value to be speculated) and also the subsequent dependency (allowing the
seq lock to be speculated). If I've missed them, apologies, but I couldn't
spot them elsewhere in this patch.

__arch_get_hw_counter should probably be identical to __arch_counter_get_cntvct
to avoid these problems. I guess we don't need to care about the case where
the counter is unstable, since we'll just disable the vDSO altogether on
such systems?

Will

2019-06-24 14:21:29

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote:
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 947e39896e28..9e4b7ccbab2f 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -25,13 +25,13 @@
> #include <linux/kvm_host.h>
> #include <linux/preempt.h>
> #include <linux/suspend.h>
> +#include <vdso/datapage.h>
> #include <asm/cpufeature.h>
> #include <asm/fixmap.h>
> #include <asm/thread_info.h>
> #include <asm/memory.h>
> #include <asm/smp_plat.h>
> #include <asm/suspend.h>
> -#include <asm/vdso_datapage.h>
> #include <linux/kbuild.h>
> #include <linux/arm-smccc.h>
>
> @@ -100,17 +100,28 @@ int main(void)
> DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
> DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
> BLANK();
> - DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
> - DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
> - DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
> - DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
> - DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
> - DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
> - DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
> - DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
> - DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
> + DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq));
> + DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode));
> + DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last));
> + DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask));
> + DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult));
> + DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift));
> + DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec));
> + DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec));
> + DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec));
> + DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec));
> + DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec));
> + DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec));
> + DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec));
> + DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec));
> + DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec));
> + DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec));
> + DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec));
> + DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec));
> + DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec));
> + DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec));
> DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
> - DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall));
> + DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
> BLANK();
> DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
> DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));

Now that we are moving this to C, do we actually need the asm-offsets?
If not, here's a clean-up patch:

---------------8<--------------------------------------
From 7e818178a8b225b522fe547cf00ba8508d4cdcf0 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <[email protected]>
Date: Mon, 24 Jun 2019 14:12:48 +0100
Subject: [PATCH] arm64: vdso: Remove unnecessary asm-offsets.c definitions

Since the VDSO code is moving to C from assembly, there is no need to
define and maintain the corresponding asm offsets.

Fixes: 28b1a824a4f4 ("arm64: vdso: Substitute gettimeofday() with C implementation")
Signed-off-by: Catalin Marinas <[email protected]>
---
arch/arm64/kernel/asm-offsets.c | 39 ---------------------------------
1 file changed, 39 deletions(-)

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index e6f7409a78a4..214685760e1c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -14,7 +14,6 @@
#include <linux/kvm_host.h>
#include <linux/preempt.h>
#include <linux/suspend.h>
-#include <vdso/datapage.h>
#include <asm/cpufeature.h>
#include <asm/fixmap.h>
#include <asm/thread_info.h>
@@ -86,44 +85,6 @@ int main(void)
BLANK();
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
BLANK();
- DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
- DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res));
- DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
- DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- BLANK();
- DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq));
- DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode));
- DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last));
- DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask));
- DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult));
- DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift));
- DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec));
- DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec));
- DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec));
- DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec));
- DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec));
- DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec));
- DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec));
- DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec));
- DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec));
- DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec));
- DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec));
- DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec));
- DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec));
- DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec));
- DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
- DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
- BLANK();
- DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
- DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
- BLANK();
- DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
- DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
- BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();

2019-06-24 14:22:30

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation



On 24/06/2019 14:36, Will Deacon wrote:
> Hi Vincenzo,
>
> On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote:
>> To take advantage of the commonly defined vdso interface for
>> gettimeofday the architectural code requires an adaptation.
>>
>> Re-implement the gettimeofday vdso in C in order to use lib/vdso.
>>
>> With the new implementation arm64 gains support for CLOCK_BOOTTIME
>> and CLOCK_TAI.
>>
>> Cc: Catalin Marinas <[email protected]>
>> Cc: Will Deacon <[email protected]>
>> Signed-off-by: Vincenzo Frascino <[email protected]>
>> Tested-by: Shijith Thotton <[email protected]>
>> Tested-by: Andre Przywara <[email protected]>
>> ---
>> arch/arm64/Kconfig | 2 +
>> arch/arm64/include/asm/vdso/gettimeofday.h | 86 ++++++
>> arch/arm64/include/asm/vdso/vsyscall.h | 53 ++++
>> arch/arm64/include/asm/vdso_datapage.h | 48 ---
>> arch/arm64/kernel/asm-offsets.c | 33 +-
>> arch/arm64/kernel/vdso.c | 51 +---
>> arch/arm64/kernel/vdso/Makefile | 34 ++-
>> arch/arm64/kernel/vdso/gettimeofday.S | 334 ---------------------
>> arch/arm64/kernel/vdso/vgettimeofday.c | 28 ++
>
> I'm concerned about an apparent semantic change introduced by your patch:
>
>> +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
>> +{
>> + u64 res;
>> +
>> + asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
>> +
>> + return res;
>> +}
>
> vs:
>
>> - .macro get_clock_shifted_nsec res, cycle_last, mult
>> - /* Read the virtual counter. */
>> - isb
>> - mrs x_tmp, cntvct_el0
>> - /* Calculate cycle delta and convert to ns. */
>> - sub \res, x_tmp, \cycle_last
>> - /* We can only guarantee 56 bits of precision. */
>> - movn x_tmp, #0xff00, lsl #48
>> - and \res, x_tmp, \res
>> - mul \res, \res, \mult
>> - /*
>> - * Fake address dependency from the value computed from the counter
>> - * register to subsequent data page accesses so that the sequence
>> - * locking also orders the read of the counter.
>> - */
>> - and x_tmp, \res, xzr
>> - add vdso_data, vdso_data, x_tmp
>> - .endm
>
> It looks like you're dropping both the preceding ISB (allowing the counter
> value to be speculated) and also the subsequent dependency (allowing the
> seq lock to be speculated). If I've missed them, apologies, but I couldn't
> spot them elsewhere in this patch.
>
> __arch_get_hw_counter should probably be identical to __arch_counter_get_cntvct
> to avoid these problems. I guess we don't need to care about the case where
> the counter is unstable, since we'll just disable the vDSO altogether on
> such systems?
>

Oops, I forgot to mirror your patch that introduces this change. I will post a
fix in reply to this email.

> Will
>

--
Regards,
Vincenzo

Subject: [tip:timers/vdso] arm64: vdso: Remove unnecessary asm-offsets.c definitions

Commit-ID: b4b12aca00d509a233abd28990194628adcd71e6
Gitweb: https://git.kernel.org/tip/b4b12aca00d509a233abd28990194628adcd71e6
Author: Catalin Marinas <[email protected]>
AuthorDate: Mon, 24 Jun 2019 14:58:12 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Tue, 25 Jun 2019 09:43:38 +0200

arm64: vdso: Remove unnecessary asm-offsets.c definitions

Since the VDSO code has moved to C from assembly, there is no need to
define and maintain the corresponding asm offsets.

Fixes: 28b1a824a4f4 ("arm64: vdso: Substitute gettimeofday() with C implementation")
Signed-off-by: Catalin Marinas <[email protected]>
Cc: Vincenzo Frascino <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Will Deacon <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Russell King <[email protected]>
Cc: Ralf Baechle <[email protected]>
Cc: Paul Burton <[email protected]>
Cc: Daniel Lezcano <[email protected]>
Cc: Mark Salyzyn <[email protected]>
Cc: Peter Collingbourne <[email protected]>
Cc: Shuah Khan <[email protected]>
Cc: Dmitry Safonov <[email protected]>
Cc: Rasmus Villemoes <[email protected]>
Cc: Huw Davies <[email protected]>
Cc: Shijith Thotton <[email protected]>
Cc: Andre Przywara <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/arm64/kernel/asm-offsets.c | 39 ---------------------------------------
1 file changed, 39 deletions(-)

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index e6f7409a78a4..214685760e1c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -14,7 +14,6 @@
#include <linux/kvm_host.h>
#include <linux/preempt.h>
#include <linux/suspend.h>
-#include <vdso/datapage.h>
#include <asm/cpufeature.h>
#include <asm/fixmap.h>
#include <asm/thread_info.h>
@@ -86,44 +85,6 @@ int main(void)
BLANK();
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
BLANK();
- DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
- DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res));
- DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
- DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- BLANK();
- DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq));
- DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode));
- DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last));
- DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask));
- DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult));
- DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift));
- DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec));
- DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec));
- DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec));
- DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec));
- DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec));
- DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec));
- DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec));
- DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec));
- DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec));
- DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec));
- DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec));
- DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec));
- DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec));
- DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec));
- DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
- DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
- BLANK();
- DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
- DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
- BLANK();
- DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
- DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
- BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();

2019-06-25 19:17:24

by Dave Martin

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote:
> To take advantage of the commonly defined vdso interface for
> gettimeofday the architectural code requires an adaptation.
>
> Re-implement the gettimeofday vdso in C in order to use lib/vdso.
>
> With the new implementation arm64 gains support for CLOCK_BOOTTIME
> and CLOCK_TAI.
>
> Cc: Catalin Marinas <[email protected]>
> Cc: Will Deacon <[email protected]>
> Signed-off-by: Vincenzo Frascino <[email protected]>
> Tested-by: Shijith Thotton <[email protected]>
> Tested-by: Andre Przywara <[email protected]>

[...]

> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
> new file mode 100644
> index 000000000000..bc3cb6738051
> --- /dev/null
> +++ b/arch/arm64/include/asm/vdso/gettimeofday.h
> @@ -0,0 +1,86 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2018 ARM Limited
> + */
> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H
> +#define __ASM_VDSO_GETTIMEOFDAY_H
> +
> +#ifndef __ASSEMBLY__
> +
> +#include <asm/unistd.h>
> +#include <uapi/linux/time.h>
> +
> +#define VDSO_HAS_CLOCK_GETRES 1
> +
> +static __always_inline int gettimeofday_fallback(
> + struct __kernel_old_timeval *_tv,
> + struct timezone *_tz)

Out of interest, does this need to be __always_inline?

> +{
> + register struct timezone *tz asm("x1") = _tz;
> + register struct __kernel_old_timeval *tv asm("x0") = _tv;
> + register long ret asm ("x0");
> + register long nr asm("x8") = __NR_gettimeofday;
> +
> + asm volatile(
> + " svc #0\n"

Can inlining of this function result in non-trivial expressions being
substituted for _tz or _tv?

A function call can clobber register asm vars that are assigned to the
caller-save registers or that the PCS uses for function arguments, and
the situations where this can happen are poorly defined AFAICT. There's
also no reliable way to detect at build time whether the compiler has
done this, and no robust way to stop if happening.

(IMHO the compiler is wrong to do this, but it's been that way for ever,
and I think I saw GCC 9 show this behaviour recently when I was
investigating something related.)


To be safe, it's better to put this out of line, or remove the reg asm()
specifiers, mark x0-x18 and lr as clobbered here (so that the compiler
doesn't map arguments to them), and put movs in the asm to move things
into the right registers. The syscall number can be passed with an "i"
constraint. (And yes, this sucks.)

If the code this is inlined in is simple enough though, we can be fairly
confident of getting away with it.

[...]

Cheers
---Dave

2019-06-25 19:21:52

by Vincenzo Frascino

[permalink] [raw]
Subject: [PATCH 1/3] lib/vdso: Delay mask application in do_hres()

do_hres() in the vDSO generic library masks the hw counter value
immediately after reading it.

Postpone the mask application after checking if the syscall fallback is
enabled, in order to be able to detect a possible fallback for the
architectures that have masks smaller than ULLONG_MAX.

Signed-off-by: Vincenzo Frascino <[email protected]>
---
lib/vdso/gettimeofday.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index ef28cc5d7bff..ee1221ba1d32 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -35,12 +35,12 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk,

do {
seq = vdso_read_begin(vd);
- cycles = __arch_get_hw_counter(vd->clock_mode) &
- vd->mask;
+ cycles = __arch_get_hw_counter(vd->clock_mode);
ns = vdso_ts->nsec;
last = vd->cycle_last;
if (unlikely((s64)cycles < 0))
return clock_gettime_fallback(clk, ts);
+ cycles &= vd->mask;
if (cycles > last)
ns += (cycles - last) * vd->mult;
ns >>= vd->shift;
--
2.22.0

2019-06-25 19:23:14

by Vincenzo Frascino

[permalink] [raw]
Subject: [PATCH 2/3] arm64: Fix __arch_get_hw_counter() implementation

Provide the following fixes for the __arch_get_hw_counter()
implementation on arm64:
- Fallback on syscall when an unstable counter is detected.
- Introduce isb()s before and after the counter read to avoid
speculation of the counter value and of the seq lock
respectively.
The second isb() is a temporary solution that will be revisited
in 5.3-rc1.

These fixes restore the semantics that __arch_counter_get_cntvct()
had on arm64.

Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Signed-off-by: Vincenzo Frascino <[email protected]>
---
arch/arm64/include/asm/vdso/gettimeofday.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)

diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index 447ef417de45..b08f476b72b4 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -10,6 +10,8 @@
#include <asm/unistd.h>
#include <uapi/linux/time.h>

+#define __VDSO_USE_SYSCALL ULLONG_MAX
+
#define VDSO_HAS_CLOCK_GETRES 1

static __always_inline
@@ -68,7 +70,24 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
{
u64 res;

+ /*
+ * clock_mode == 0 implies that vDSO are enabled otherwise
+ * fallback on syscall.
+ */
+ if (clock_mode)
+ return __VDSO_USE_SYSCALL;
+
+ /*
+ * This isb() is required to prevent that the counter value
+ * is speculated.
+ */
+ isb();
asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
+ /*
+ * This isb() is required to prevent that the seq lock is
+ * speculated.#
+ */
+ isb();

return res;
}
--
2.22.0

2019-06-25 19:23:23

by Vincenzo Frascino

[permalink] [raw]
Subject: [PATCH 3/3] arm64: compat: Fix __arch_get_hw_counter() implementation

Provide the following fixes for the __arch_get_hw_counter()
implementation on arm64:
- Fallback on syscall when an unstable counter is detected.
- Introduce isb()s before and after the counter read to avoid
speculation of the counter value and of the seq lock
respectively.
The second isb() is a temporary solution that will be revisited
in 5.3-rc1.

These fixes restore the semantics that __arch_counter_get_cntvct()
had on arm64.

Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Signed-off-by: Vincenzo Frascino <[email protected]>
---
.../include/asm/vdso/compat_gettimeofday.h | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)

diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
index 93dbd935b66d..f4812777f5c5 100644
--- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -12,6 +12,8 @@

#include <asm/vdso/compat_barrier.h>

+#define __VDSO_USE_SYSCALL ULLONG_MAX
+
#define VDSO_HAS_CLOCK_GETRES 1

static __always_inline
@@ -74,8 +76,24 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
{
u64 res;

+ /*
+ * clock_mode == 0 implies that vDSO are enabled otherwise
+ * fallback on syscall.
+ */
+ if (clock_mode)
+ return __VDSO_USE_SYSCALL;
+
+ /*
+ * This isb() is required to prevent that the counter value
+ * is speculated.
+ */
isb();
asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res));
+ /*
+ * This isb() is required to prevent that the seq lock is
+ * speculated.
+ */
+ isb();

return res;
}
--
2.22.0

2019-06-25 19:44:05

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/3] lib/vdso: Delay mask application in do_hres()

On Tue, 25 Jun 2019, Vincenzo Frascino wrote:

CC+ Andy

> do_hres() in the vDSO generic library masks the hw counter value
> immediately after reading it.
>
> Postpone the mask application after checking if the syscall fallback is
> enabled, in order to be able to detect a possible fallback for the
> architectures that have masks smaller than ULLONG_MAX.

Right. This only worked on x86 because the mask is there ULLONG_MAX for all
VDSO capable clocksources, i.e. that ever worked just by chance.

As we talked about that already yesterday, I tested this on a couple of
machines and as expected the outcome is uarch dependent. Minimal deviations
to both sides and some machines do not show any change at all. I doubt it's
possible to come up with a solution which makes all uarchs go faster
magically.

Though, thinking about it, we could remove the mask operation completely on
X86. /me runs tests

Thanks,

tglx


> Signed-off-by: Vincenzo Frascino <[email protected]>
> ---
> lib/vdso/gettimeofday.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
> index ef28cc5d7bff..ee1221ba1d32 100644
> --- a/lib/vdso/gettimeofday.c
> +++ b/lib/vdso/gettimeofday.c
> @@ -35,12 +35,12 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk,
>
> do {
> seq = vdso_read_begin(vd);
> - cycles = __arch_get_hw_counter(vd->clock_mode) &
> - vd->mask;
> + cycles = __arch_get_hw_counter(vd->clock_mode);
> ns = vdso_ts->nsec;
> last = vd->cycle_last;
> if (unlikely((s64)cycles < 0))
> return clock_gettime_fallback(clk, ts);
> + cycles &= vd->mask;
> if (cycles > last)
> ns += (cycles - last) * vd->mult;
> ns >>= vd->shift;
> --
> 2.22.0
>
>

2019-06-25 19:47:31

by Vincenzo Frascino

[permalink] [raw]
Subject: [PATCH] arm64: vdso: Fix compilation with clang < 8

clang versions previous to 8 do not support -mcmodel=tiny.

Add a check to the vDSO Makefile for arm64 to remove the flag when these
versions of the compiler are detected.

Signed-off-by: Vincenzo Frascino <[email protected]>
Reported-by: Qian Cai <[email protected]>
Tested-by: Qian Cai <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index ec81d28aeb5d..5154f50aff2d 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -38,6 +38,11 @@ else
CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
endif

+# Clang versions less than 8 do not support -mcmodel=tiny
+ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0)
+CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny
+endif
+
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n

--
2.22.0

2019-06-25 19:56:53

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/3] lib/vdso: Delay mask application in do_hres()

On Tue, 25 Jun 2019, Thomas Gleixner wrote:

> On Tue, 25 Jun 2019, Vincenzo Frascino wrote:
>
> CC+ Andy
>
> > do_hres() in the vDSO generic library masks the hw counter value
> > immediately after reading it.
> >
> > Postpone the mask application after checking if the syscall fallback is
> > enabled, in order to be able to detect a possible fallback for the
> > architectures that have masks smaller than ULLONG_MAX.
>
> Right. This only worked on x86 because the mask is there ULLONG_MAX for all
> VDSO capable clocksources, i.e. that ever worked just by chance.
>
> As we talked about that already yesterday, I tested this on a couple of
> machines and as expected the outcome is uarch dependent. Minimal deviations
> to both sides and some machines do not show any change at all. I doubt it's
> possible to come up with a solution which makes all uarchs go faster
> magically.
>
> Though, thinking about it, we could remove the mask operation completely on
> X86. /me runs tests

Unsurprisingly the results vary. Two uarchs do not care, but they did not
care about moving the mask either. The other two gain performance and the
last one falls back to the state before moving the mask. So in general it
looks like a worthwhile optimization.

Thanks,

tglx


2019-06-25 20:16:05

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH 1/3] lib/vdso: Delay mask application in do_hres()

On Tue, Jun 25, 2019 at 11:27 AM Thomas Gleixner <[email protected]> wrote:
>
> On Tue, 25 Jun 2019, Thomas Gleixner wrote:
>
> > On Tue, 25 Jun 2019, Vincenzo Frascino wrote:
> >
> > CC+ Andy
> >
> > > do_hres() in the vDSO generic library masks the hw counter value
> > > immediately after reading it.
> > >
> > > Postpone the mask application after checking if the syscall fallback is
> > > enabled, in order to be able to detect a possible fallback for the
> > > architectures that have masks smaller than ULLONG_MAX.
> >
> > Right. This only worked on x86 because the mask is there ULLONG_MAX for all
> > VDSO capable clocksources, i.e. that ever worked just by chance.
> >
> > As we talked about that already yesterday, I tested this on a couple of
> > machines and as expected the outcome is uarch dependent. Minimal deviations
> > to both sides and some machines do not show any change at all. I doubt it's
> > possible to come up with a solution which makes all uarchs go faster
> > magically.
> >
> > Though, thinking about it, we could remove the mask operation completely on
> > X86. /me runs tests
>
> Unsurprisingly the results vary. Two uarchs do not care, but they did not
> care about moving the mask either. The other two gain performance and the
> last one falls back to the state before moving the mask. So in general it
> looks like a worthwhile optimization.
>

At one point, I contemplated a different approach: have the "get the
counter" routine return 0 and then do if (unlikely(cycles <= last))
goto fallback. This will remove one branch from the hot path. I got
dubious results when I tried benchmarking it, probably because the
branch in question was always correctly predicted.

2019-06-25 22:26:10

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/3] lib/vdso: Delay mask application in do_hres()

On Tue, 25 Jun 2019, Andy Lutomirski wrote:
> On Tue, Jun 25, 2019 at 11:27 AM Thomas Gleixner <[email protected]> wrote:
> >
> > On Tue, 25 Jun 2019, Thomas Gleixner wrote:
> >
> > > On Tue, 25 Jun 2019, Vincenzo Frascino wrote:
> > >
> > > CC+ Andy
> > >
> > > > do_hres() in the vDSO generic library masks the hw counter value
> > > > immediately after reading it.
> > > >
> > > > Postpone the mask application after checking if the syscall fallback is
> > > > enabled, in order to be able to detect a possible fallback for the
> > > > architectures that have masks smaller than ULLONG_MAX.
> > >
> > > Right. This only worked on x86 because the mask is there ULLONG_MAX for all
> > > VDSO capable clocksources, i.e. that ever worked just by chance.
> > >
> > > As we talked about that already yesterday, I tested this on a couple of
> > > machines and as expected the outcome is uarch dependent. Minimal deviations
> > > to both sides and some machines do not show any change at all. I doubt it's
> > > possible to come up with a solution which makes all uarchs go faster
> > > magically.
> > >
> > > Though, thinking about it, we could remove the mask operation completely on
> > > X86. /me runs tests
> >
> > Unsurprisingly the results vary. Two uarchs do not care, but they did not
> > care about moving the mask either. The other two gain performance and the
> > last one falls back to the state before moving the mask. So in general it
> > looks like a worthwhile optimization.
> >
>
> At one point, I contemplated a different approach: have the "get the
> counter" routine return 0 and then do if (unlikely(cycles <= last))
> goto fallback. This will remove one branch from the hot path. I got
> dubious results when I tried benchmarking it, probably because the
> branch in question was always correctly predicted.

Just tried and it's the same thing. One drops, one does not care and one
gains. Did not test the other two as they are asleep already. There is no
universal cure for this I fear. I even tried a uarch optimized build a few
days ago which came out worse than the generic one...

The issue in that code path is the fencing of the TSC read. That seems to
screw up every uarch in a different way.

If you have no objections I'll queue this change (moving the mask) along
with the other two ARM64 ones to unbreak the fallback path for these errata
inflicted machines.

Thanks,

tglx

2019-06-26 06:41:12

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/3] lib/vdso: Delay mask application in do_hres()

On Tue, 25 Jun 2019, Thomas Gleixner wrote:
> On Tue, 25 Jun 2019, Vincenzo Frascino wrote:
> > do_hres() in the vDSO generic library masks the hw counter value
> > immediately after reading it.
> >
> > Postpone the mask application after checking if the syscall fallback is
> > enabled, in order to be able to detect a possible fallback for the
> > architectures that have masks smaller than ULLONG_MAX.
>
> Right. This only worked on x86 because the mask is there ULLONG_MAX for all
> VDSO capable clocksources, i.e. that ever worked just by chance.

But it's actually worse than that:

> > + cycles &= vd->mask;
> > if (cycles > last)
> > ns += (cycles - last) * vd->mult;
> > ns >>= vd->shift;

This is broken for any clocksource which can legitimately wrap around. The
core timekeeping does the right thing:

(cycles - last) & mask

That makes sure that a wraparound is correctly handled. With the above the
wrap around would be ignored due to

if (cycles > last)

Stupid me. I should have added big fat comments to the x86 vdso why this
all works correctly and only correctly for the x86 crud. That was part of
squeezing the last cycles out of the vdso.

Sorry for not noticing earlier. Working on a fix.

Thanks,

tglx


2019-06-26 09:27:41

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH 1/3] lib/vdso: Delay mask application in do_hres()

Hi Thomas,

On 26/06/2019 07:38, Thomas Gleixner wrote:
> On Tue, 25 Jun 2019, Thomas Gleixner wrote:
>> On Tue, 25 Jun 2019, Vincenzo Frascino wrote:
>>> do_hres() in the vDSO generic library masks the hw counter value
>>> immediately after reading it.
>>>
>>> Postpone the mask application after checking if the syscall fallback is
>>> enabled, in order to be able to detect a possible fallback for the
>>> architectures that have masks smaller than ULLONG_MAX.
>>
>> Right. This only worked on x86 because the mask is there ULLONG_MAX for all
>> VDSO capable clocksources, i.e. that ever worked just by chance.
>
> But it's actually worse than that:
>
>>> + cycles &= vd->mask;
>>> if (cycles > last)
>>> ns += (cycles - last) * vd->mult;
>>> ns >>= vd->shift;
>
> This is broken for any clocksource which can legitimately wrap around. The
> core timekeeping does the right thing:
>
> (cycles - last) & mask
>
> That makes sure that a wraparound is correctly handled. With the above the
> wrap around would be ignored due to
>
> if (cycles > last)
>

You are right. Thanks for spotting it.


...

--
Regards,
Vincenzo

2019-06-26 10:02:48

by Thomas Gleixner

[permalink] [raw]
Subject: lib/vdso: Make delta calculation work correctly

The x86 vdso implementation on which the generic vdso library is based on
has subtle (unfortunately undocumented) twists:

1) The code assumes that the clocksource mask is U64_MAX which means that
no bits are masked. Which is true for any valid x86 VDSO clocksource.
Stupidly it still did the mask operation for no reason and at the wrong
place right after reading the clocksource.

2) It contains a sanity check to catch the case where slightly
unsynchronized TSC values can be overserved which would cause the delta
calculation to make a huge jump. It therefore checks whether the
current TSC value is larger than the value on which the current
conversion is based on. If it's not larger the base value is used to
prevent time jumps.

#1 Is not only stupid for the X86 case because it does the masking for no
reason it is also completely wrong for clocksources with a smaller mask
which can legitimately wrap around during a conversion period. The core
timekeeping code does it correct by applying the mask after the delta
calculation:

(now - base) & mask

#2 is equally broken for clocksources which have smaller masks and can wrap
around during a conversion period because there the now > base check is
just wrong and causes stale time stamps and time going backwards issues.

Unbreak it by:

1) Removing the mask operation from the clocksource read which makes the
fallback detection work for all clocksources

2) Replacing the conditional delta calculation with a overrideable inline
function.

#2 could reuse clocksource_delta() from the timekeeping code but that
results in a significant performance hit for the x86 VSDO. The timekeeping
core code must have the non optimized version as it has to operate
correctly with clocksources which have smaller masks as well to handle the
case where TSC is discarded as timekeeper clocksource and replaced by HPET
or pmtimer. For the VDSO there is no replacement clocksource. If TSC is
unusable the syscall is enforced which does the right thing.

To accomodate to the needs of various architectures provide an overrideable
inline function which defaults to the regular delta calculation with
masking:

(now - base) & mask

Override it for x86 with the non-masking and checking version.

This unbreaks the ARM64 syscall fallback operation, allows to use
clocksources with arbitrary width and preserves the performance
optimization for x86.

Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/vdso/gettimeofday.h | 27 +++++++++++++++++++++++++++
lib/vdso/gettimeofday.c | 19 +++++++++++++++----
2 files changed, 42 insertions(+), 4 deletions(-)

--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -229,6 +229,33 @@ static __always_inline const struct vdso
return __vdso_data;
}

+/*
+ * x86 specific delta calculation.
+ *
+ * The regular implementation assumes that clocksource reads are globally
+ * monotonic. The TSC can be slightly off across sockets which can cause
+ * the regular delta calculation (@cycles - @last) to return a huge time
+ * jump.
+ *
+ * Therefore it needs to be verified that @cycles are greater than
+ * @last. If not then use @last, which is the base time of the current
+ * conversion period.
+ *
+ * This variant also removes the masking of the subtraction because the
+ * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX
+ * which would result in a pointless operation. The compiler cannot
+ * optimize it away as the mask comes from the vdso data and is not compile
+ * time constant.
+ */
+static __always_inline
+u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ if (cycles > last)
+ return (cycles - last) * mult;
+ return 0;
+}
+#define vdso_calc_delta vdso_calc_delta
+
#endif /* !__ASSEMBLY__ */

#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -26,6 +26,18 @@
#include <asm/vdso/gettimeofday.h>
#endif /* ENABLE_COMPAT_VDSO */

+#ifndef vdso_calc_delta
+/*
+ * Default implementation which works for all sane clocksources. That
+ * obviously excludes x86/TSC.
+ */
+static __always_inline
+u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ return ((cyles - last) & mask) * mult;
+}
+#endif
+
static int do_hres(const struct vdso_data *vd, clockid_t clk,
struct __kernel_timespec *ts)
{
@@ -35,14 +47,13 @@ static int do_hres(const struct vdso_dat

do {
seq = vdso_read_begin(vd);
- cycles = __arch_get_hw_counter(vd->clock_mode) &
- vd->mask;
+ cycles = __arch_get_hw_counter(vd->clock_mode);
ns = vdso_ts->nsec;
last = vd->cycle_last;
if (unlikely((s64)cycles < 0))
return clock_gettime_fallback(clk, ts);
- if (cycles > last)
- ns += (cycles - last) * vd->mult;
+
+ ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
ns >>= vd->shift;
sec = vdso_ts->sec;
} while (unlikely(vdso_read_retry(vd, seq)));

2019-06-26 11:09:02

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: lib/vdso: Make delta calculation work correctly

Hi Thomas,

On 26/06/2019 11:02, Thomas Gleixner wrote:
> The x86 vdso implementation on which the generic vdso library is based on
> has subtle (unfortunately undocumented) twists:
>
> 1) The code assumes that the clocksource mask is U64_MAX which means that
> no bits are masked. Which is true for any valid x86 VDSO clocksource.
> Stupidly it still did the mask operation for no reason and at the wrong
> place right after reading the clocksource.
>
> 2) It contains a sanity check to catch the case where slightly
> unsynchronized TSC values can be overserved which would cause the delta
> calculation to make a huge jump. It therefore checks whether the
> current TSC value is larger than the value on which the current
> conversion is based on. If it's not larger the base value is used to
> prevent time jumps.
>
> #1 Is not only stupid for the X86 case because it does the masking for no
> reason it is also completely wrong for clocksources with a smaller mask
> which can legitimately wrap around during a conversion period. The core
> timekeeping code does it correct by applying the mask after the delta
> calculation:
>
> (now - base) & mask
>
> #2 is equally broken for clocksources which have smaller masks and can wrap
> around during a conversion period because there the now > base check is
> just wrong and causes stale time stamps and time going backwards issues.
>
> Unbreak it by:
>
> 1) Removing the mask operation from the clocksource read which makes the
> fallback detection work for all clocksources
>
> 2) Replacing the conditional delta calculation with a overrideable inline
> function.
>
> #2 could reuse clocksource_delta() from the timekeeping code but that
> results in a significant performance hit for the x86 VSDO. The timekeeping
> core code must have the non optimized version as it has to operate
> correctly with clocksources which have smaller masks as well to handle the
> case where TSC is discarded as timekeeper clocksource and replaced by HPET
> or pmtimer. For the VDSO there is no replacement clocksource. If TSC is
> unusable the syscall is enforced which does the right thing.
>
> To accomodate to the needs of various architectures provide an overrideable
> inline function which defaults to the regular delta calculation with
> masking:
>
> (now - base) & mask
>
> Override it for x86 with the non-masking and checking version.
>
> This unbreaks the ARM64 syscall fallback operation, allows to use
> clocksources with arbitrary width and preserves the performance
> optimization for x86.
>
> Signed-off-by: Thomas Gleixner <[email protected]>

A part a typo that leads to compilation errors on non-x86 platforms the rest
looks fine by me.

I tested it on arm64 and behaves correctly.

With this:

Reviewed-by: Vincenzo Frascino <[email protected]>

> ---
> arch/x86/include/asm/vdso/gettimeofday.h | 27 +++++++++++++++++++++++++++
> lib/vdso/gettimeofday.c | 19 +++++++++++++++----
> 2 files changed, 42 insertions(+), 4 deletions(-)
>
> --- a/arch/x86/include/asm/vdso/gettimeofday.h
> +++ b/arch/x86/include/asm/vdso/gettimeofday.h
> @@ -229,6 +229,33 @@ static __always_inline const struct vdso
> return __vdso_data;
> }
>
> +/*
> + * x86 specific delta calculation.
> + *
> + * The regular implementation assumes that clocksource reads are globally
> + * monotonic. The TSC can be slightly off across sockets which can cause
> + * the regular delta calculation (@cycles - @last) to return a huge time
> + * jump.
> + *
> + * Therefore it needs to be verified that @cycles are greater than
> + * @last. If not then use @last, which is the base time of the current
> + * conversion period.
> + *
> + * This variant also removes the masking of the subtraction because the
> + * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX
> + * which would result in a pointless operation. The compiler cannot
> + * optimize it away as the mask comes from the vdso data and is not compile
> + * time constant.
> + */
> +static __always_inline
> +u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
> +{
> + if (cycles > last)
> + return (cycles - last) * mult;
> + return 0;
> +}
> +#define vdso_calc_delta vdso_calc_delta
> +
> #endif /* !__ASSEMBLY__ */
>
> #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
> --- a/lib/vdso/gettimeofday.c
> +++ b/lib/vdso/gettimeofday.c
> @@ -26,6 +26,18 @@
> #include <asm/vdso/gettimeofday.h>
> #endif /* ENABLE_COMPAT_VDSO */
>
> +#ifndef vdso_calc_delta
> +/*
> + * Default implementation which works for all sane clocksources. That
> + * obviously excludes x86/TSC.
> + */
> +static __always_inline
> +u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
> +{
> + return ((cyles - last) & mask) * mult;

Typo here:

s/cyles/cycles/

> +}
> +#endif
> +
> static int do_hres(const struct vdso_data *vd, clockid_t clk,
> struct __kernel_timespec *ts)
> {
> @@ -35,14 +47,13 @@ static int do_hres(const struct vdso_dat
>
> do {
> seq = vdso_read_begin(vd);
> - cycles = __arch_get_hw_counter(vd->clock_mode) &
> - vd->mask;
> + cycles = __arch_get_hw_counter(vd->clock_mode);
> ns = vdso_ts->nsec;
> last = vd->cycle_last;
> if (unlikely((s64)cycles < 0))
> return clock_gettime_fallback(clk, ts);
> - if (cycles > last)
> - ns += (cycles - last) * vd->mult;
> +
> + ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
> ns >>= vd->shift;
> sec = vdso_ts->sec;
> } while (unlikely(vdso_read_retry(vd, seq)));
>

--
Regards,
Vincenzo

2019-06-26 11:37:16

by Vincenzo Frascino

[permalink] [raw]
Subject: [PATCH v2] arm64: vdso: Fix compilation with clang older then 8

clang versions older then 8 do not support -mcmodel=tiny.

Add a check to the vDSO Makefile for arm64 to remove the flag when
these versions of the compiler are detected.

Signed-off-by: Vincenzo Frascino <[email protected]>
Reported-by: Qian Cai <[email protected]>
Tested-by: Qian Cai <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 7 +++++++
1 file changed, 7 insertions(+)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index ec81d28aeb5d..4ab863045188 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -38,6 +38,13 @@ else
CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
endif

+# Clang versions less than 8 do not support -mcmodel=tiny
+ifeq ($(CONFIG_CC_IS_CLANG), y)
+ ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0)
+ CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny
+ endif
+endif
+
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n

--
2.22.0

Subject: [tip:timers/vdso] arm64: vdso: Remove unnecessary asm-offsets.c definitions

Commit-ID: 94fee4d43752b6022428d9de402632904968e15b
Gitweb: https://git.kernel.org/tip/94fee4d43752b6022428d9de402632904968e15b
Author: Catalin Marinas <[email protected]>
AuthorDate: Mon, 24 Jun 2019 14:58:12 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Wed, 26 Jun 2019 07:28:10 +0200

arm64: vdso: Remove unnecessary asm-offsets.c definitions

Since the VDSO code has moved to C from assembly, there is no need to
define and maintain the corresponding asm offsets.

Fixes: 28b1a824a4f4 ("arm64: vdso: Substitute gettimeofday() with C implementation")
Signed-off-by: Catalin Marinas <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: Vincenzo Frascino <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Will Deacon <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Russell King <[email protected]>
Cc: Ralf Baechle <[email protected]>
Cc: Paul Burton <[email protected]>
Cc: Daniel Lezcano <[email protected]>
Cc: Mark Salyzyn <[email protected]>
Cc: Peter Collingbourne <[email protected]>
Cc: Shuah Khan <[email protected]>
Cc: Dmitry Safonov <[email protected]>
Cc: Rasmus Villemoes <[email protected]>
Cc: Huw Davies <[email protected]>
Cc: Shijith Thotton <[email protected]>
Cc: Andre Przywara <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/arm64/kernel/asm-offsets.c | 39 ---------------------------------------
1 file changed, 39 deletions(-)

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index e6f7409a78a4..214685760e1c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -14,7 +14,6 @@
#include <linux/kvm_host.h>
#include <linux/preempt.h>
#include <linux/suspend.h>
-#include <vdso/datapage.h>
#include <asm/cpufeature.h>
#include <asm/fixmap.h>
#include <asm/thread_info.h>
@@ -86,44 +85,6 @@ int main(void)
BLANK();
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
BLANK();
- DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
- DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res));
- DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
- DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- BLANK();
- DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq));
- DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode));
- DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last));
- DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask));
- DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult));
- DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift));
- DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec));
- DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec));
- DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec));
- DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec));
- DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec));
- DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec));
- DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec));
- DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec));
- DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec));
- DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec));
- DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec));
- DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec));
- DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec));
- DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec));
- DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
- DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
- BLANK();
- DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
- DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
- BLANK();
- DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
- DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
- BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();

Subject: [tip:timers/vdso] lib/vdso: Make delta calculation work correctly

Commit-ID: 9d90b93bf325e015bbae31b83f16da5e4e17effa
Gitweb: https://git.kernel.org/tip/9d90b93bf325e015bbae31b83f16da5e4e17effa
Author: Thomas Gleixner <[email protected]>
AuthorDate: Wed, 26 Jun 2019 12:02:00 +0200
Committer: Thomas Gleixner <[email protected]>
CommitDate: Wed, 26 Jun 2019 14:26:53 +0200

lib/vdso: Make delta calculation work correctly

The x86 vdso implementation on which the generic vdso library is based on
has subtle (unfortunately undocumented) twists:

1) The code assumes that the clocksource mask is U64_MAX which means that
no bits are masked. Which is true for any valid x86 VDSO clocksource.
Stupidly it still did the mask operation for no reason and at the wrong
place right after reading the clocksource.

2) It contains a sanity check to catch the case where slightly
unsynchronized TSC values can be observed which would cause the delta
calculation to make a huge jump. It therefore checks whether the
current TSC value is larger than the value on which the current
conversion is based on. If it's not larger the base value is used to
prevent time jumps.

#1 Is not only stupid for the X86 case because it does the masking for no
reason it is also completely wrong for clocksources with a smaller mask
which can legitimately wrap around during a conversion period. The core
timekeeping code does it correct by applying the mask after the delta
calculation:

(now - base) & mask

#2 is equally broken for clocksources which have smaller masks and can wrap
around during a conversion period because there the now > base check is
just wrong and causes stale time stamps and time going backwards issues.

Unbreak it by:

1) Removing the mask operation from the clocksource read which makes the
fallback detection work for all clocksources

2) Replacing the conditional delta calculation with a overrideable inline
function.

#2 could reuse clocksource_delta() from the timekeeping code but that
results in a significant performance hit for the x86 VSDO. The timekeeping
core code must have the non optimized version as it has to operate
correctly with clocksources which have smaller masks as well to handle the
case where TSC is discarded as timekeeper clocksource and replaced by HPET
or pmtimer. For the VDSO there is no replacement clocksource. If TSC is
unusable the syscall is enforced which does the right thing.

To accommodate to the needs of various architectures provide an
override-able inline function which defaults to the regular delta
calculation with masking:

(now - base) & mask

Override it for x86 with the non-masking and checking version.

This unbreaks the ARM64 syscall fallback operation, allows to use
clocksources with arbitrary width and preserves the performance
optimization for x86.

Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Vincenzo Frascino <[email protected]>
Cc: [email protected]
Cc: LAK <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Will Deacon <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: [email protected]
Cc: Ralf Baechle <[email protected]>
Cc: [email protected]
Cc: Daniel Lezcano <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Andy Lutomirski <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/include/asm/vdso/gettimeofday.h | 27 +++++++++++++++++++++++++++
lib/vdso/gettimeofday.c | 19 +++++++++++++++----
2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index 5b63f1f78a1f..a14039a59abd 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -229,6 +229,33 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
return __vdso_data;
}

+/*
+ * x86 specific delta calculation.
+ *
+ * The regular implementation assumes that clocksource reads are globally
+ * monotonic. The TSC can be slightly off across sockets which can cause
+ * the regular delta calculation (@cycles - @last) to return a huge time
+ * jump.
+ *
+ * Therefore it needs to be verified that @cycles are greater than
+ * @last. If not then use @last, which is the base time of the current
+ * conversion period.
+ *
+ * This variant also removes the masking of the subtraction because the
+ * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX
+ * which would result in a pointless operation. The compiler cannot
+ * optimize it away as the mask comes from the vdso data and is not compile
+ * time constant.
+ */
+static __always_inline
+u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ if (cycles > last)
+ return (cycles - last) * mult;
+ return 0;
+}
+#define vdso_calc_delta vdso_calc_delta
+
#endif /* !__ASSEMBLY__ */

#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index ef28cc5d7bff..2d1c1f241fd9 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -26,6 +26,18 @@
#include <asm/vdso/gettimeofday.h>
#endif /* ENABLE_COMPAT_VDSO */

+#ifndef vdso_calc_delta
+/*
+ * Default implementation which works for all sane clocksources. That
+ * obviously excludes x86/TSC.
+ */
+static __always_inline
+u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ return ((cycles - last) & mask) * mult;
+}
+#endif
+
static int do_hres(const struct vdso_data *vd, clockid_t clk,
struct __kernel_timespec *ts)
{
@@ -35,14 +47,13 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk,

do {
seq = vdso_read_begin(vd);
- cycles = __arch_get_hw_counter(vd->clock_mode) &
- vd->mask;
+ cycles = __arch_get_hw_counter(vd->clock_mode);
ns = vdso_ts->nsec;
last = vd->cycle_last;
if (unlikely((s64)cycles < 0))
return clock_gettime_fallback(clk, ts);
- if (cycles > last)
- ns += (cycles - last) * vd->mult;
+
+ ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
ns >>= vd->shift;
sec = vdso_ts->sec;
} while (unlikely(vdso_read_retry(vd, seq)));

Subject: [tip:timers/vdso] arm64: Fix __arch_get_hw_counter() implementation

Commit-ID: 27e11a9fe2e2e7e0d13f854e89a71e488678fb17
Gitweb: https://git.kernel.org/tip/27e11a9fe2e2e7e0d13f854e89a71e488678fb17
Author: Vincenzo Frascino <[email protected]>
AuthorDate: Tue, 25 Jun 2019 17:18:03 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Wed, 26 Jun 2019 14:26:54 +0200

arm64: Fix __arch_get_hw_counter() implementation

Provide the following fixes for the __arch_get_hw_counter()
implementation on arm64:
- Fallback on syscall when an unstable counter is detected.
- Introduce isb()s before and after the counter read to avoid
speculation of the counter value and of the seq lock
respectively.
The second isb() is a temporary solution that will be revisited
in 5.3-rc1.

These fixes restore the semantics that __arch_counter_get_cntvct()
had on arm64.

Signed-off-by: Vincenzo Frascino <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/arm64/include/asm/vdso/gettimeofday.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)

diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index 447ef417de45..b08f476b72b4 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -10,6 +10,8 @@
#include <asm/unistd.h>
#include <uapi/linux/time.h>

+#define __VDSO_USE_SYSCALL ULLONG_MAX
+
#define VDSO_HAS_CLOCK_GETRES 1

static __always_inline
@@ -68,7 +70,24 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
{
u64 res;

+ /*
+ * clock_mode == 0 implies that vDSO are enabled otherwise
+ * fallback on syscall.
+ */
+ if (clock_mode)
+ return __VDSO_USE_SYSCALL;
+
+ /*
+ * This isb() is required to prevent that the counter value
+ * is speculated.
+ */
+ isb();
asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
+ /*
+ * This isb() is required to prevent that the seq lock is
+ * speculated.#
+ */
+ isb();

return res;
}

Subject: [tip:timers/vdso] arm64: compat: Fix __arch_get_hw_counter() implementation

Commit-ID: 6241c4dc6ec56a7627b972959da8b492b765b209
Gitweb: https://git.kernel.org/tip/6241c4dc6ec56a7627b972959da8b492b765b209
Author: Vincenzo Frascino <[email protected]>
AuthorDate: Tue, 25 Jun 2019 17:18:04 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Wed, 26 Jun 2019 14:26:54 +0200

arm64: compat: Fix __arch_get_hw_counter() implementation

Provide the following fixes for the __arch_get_hw_counter()
implementation on arm64:
- Fallback on syscall when an unstable counter is detected.
- Introduce isb()s before and after the counter read to avoid
speculation of the counter value and of the seq lock
respectively.
The second isb() is a temporary solution that will be revisited
in 5.3-rc1.

These fixes restore the semantics that __arch_counter_get_cntvct()
had on arm64.

Signed-off-by: Vincenzo Frascino <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/arm64/include/asm/vdso/compat_gettimeofday.h | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)

diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
index 93dbd935b66d..f4812777f5c5 100644
--- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -12,6 +12,8 @@

#include <asm/vdso/compat_barrier.h>

+#define __VDSO_USE_SYSCALL ULLONG_MAX
+
#define VDSO_HAS_CLOCK_GETRES 1

static __always_inline
@@ -74,8 +76,24 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
{
u64 res;

+ /*
+ * clock_mode == 0 implies that vDSO are enabled otherwise
+ * fallback on syscall.
+ */
+ if (clock_mode)
+ return __VDSO_USE_SYSCALL;
+
+ /*
+ * This isb() is required to prevent that the counter value
+ * is speculated.
+ */
isb();
asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res));
+ /*
+ * This isb() is required to prevent that the seq lock is
+ * speculated.
+ */
+ isb();

return res;
}

Subject: [tip:timers/vdso] arm64: vdso: Fix compilation with clang older than 8

Commit-ID: 3acf4be235280f14d838581a750532219d67facc
Gitweb: https://git.kernel.org/tip/3acf4be235280f14d838581a750532219d67facc
Author: Vincenzo Frascino <[email protected]>
AuthorDate: Wed, 26 Jun 2019 12:36:32 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Wed, 26 Jun 2019 14:26:55 +0200

arm64: vdso: Fix compilation with clang older than 8

clang versions older than 8 do not support -mcmodel=tiny.

Add a check to the vDSO Makefile for arm64 to remove the flag when
these versions of the compiler are detected.

Reported-by: Qian Cai <[email protected]>
Signed-off-by: Vincenzo Frascino <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Qian Cai <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]

---
arch/arm64/kernel/vdso/Makefile | 7 +++++++
1 file changed, 7 insertions(+)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index ec81d28aeb5d..4ab863045188 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -38,6 +38,13 @@ else
CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
endif

+# Clang versions less than 8 do not support -mcmodel=tiny
+ifeq ($(CONFIG_CC_IS_CLANG), y)
+ ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0)
+ CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny
+ endif
+endif
+
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n

2019-06-26 13:29:15

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

Hi Dave,

On 25/06/2019 16:33, Dave Martin wrote:
> On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote:
>> To take advantage of the commonly defined vdso interface for
>> gettimeofday the architectural code requires an adaptation.
>>
>> Re-implement the gettimeofday vdso in C in order to use lib/vdso.
>>
>> With the new implementation arm64 gains support for CLOCK_BOOTTIME
>> and CLOCK_TAI.
>>
>> Cc: Catalin Marinas <[email protected]>
>> Cc: Will Deacon <[email protected]>
>> Signed-off-by: Vincenzo Frascino <[email protected]>
>> Tested-by: Shijith Thotton <[email protected]>
>> Tested-by: Andre Przywara <[email protected]>
>
> [...]
>
>> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
>> new file mode 100644
>> index 000000000000..bc3cb6738051
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/vdso/gettimeofday.h
>> @@ -0,0 +1,86 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2018 ARM Limited
>> + */
>> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H
>> +#define __ASM_VDSO_GETTIMEOFDAY_H
>> +
>> +#ifndef __ASSEMBLY__
>> +
>> +#include <asm/unistd.h>
>> +#include <uapi/linux/time.h>
>> +
>> +#define VDSO_HAS_CLOCK_GETRES 1
>> +
>> +static __always_inline int gettimeofday_fallback(
>> + struct __kernel_old_timeval *_tv,
>> + struct timezone *_tz)
>
> Out of interest, does this need to be __always_inline?
>

It is a design choice. Philosophically, I prefer to control and reduce the scope
of the decisions the compiler has to make in order to not have surprises.

>> +{
>> + register struct timezone *tz asm("x1") = _tz;
>> + register struct __kernel_old_timeval *tv asm("x0") = _tv;
>> + register long ret asm ("x0");
>> + register long nr asm("x8") = __NR_gettimeofday;
>> +
>> + asm volatile(
>> + " svc #0\n"
>
> Can inlining of this function result in non-trivial expressions being
> substituted for _tz or _tv?
>
> A function call can clobber register asm vars that are assigned to the
> caller-save registers or that the PCS uses for function arguments, and
> the situations where this can happen are poorly defined AFAICT. There's
> also no reliable way to detect at build time whether the compiler has
> done this, and no robust way to stop if happening.
>
> (IMHO the compiler is wrong to do this, but it's been that way for ever,
> and I think I saw GCC 9 show this behaviour recently when I was
> investigating something related.)
>
>
> To be safe, it's better to put this out of line, or remove the reg asm()
> specifiers, mark x0-x18 and lr as clobbered here (so that the compiler
> doesn't map arguments to them), and put movs in the asm to move things
> into the right registers. The syscall number can be passed with an "i"
> constraint. (And yes, this sucks.)
>
> If the code this is inlined in is simple enough though, we can be fairly
> confident of getting away with it.
>

I took very seriously what you are mentioning here because I think that
robustness of the code comes before than everything especially in the kernel and
I carried on some experiments to try to verify if in this case is safe to assume
that the compiler is doing the right thing.

Based on my investigation and on previous observations of the generation of the
vDSO library, I can conclude that the approach seems safe due to the fact that
the usage of this code is very limited, the code itself is simple enough and
that gcc would inline this code anyway based on the current compilation options.

The experiment that I did was to define some self-contained code that tries to
mimic what you are describing and compile it with 3 different versions of gcc
(6.4, 8.1 and 8.3) and in all the tree cases the behavior seems correct.

Code:
=====

typedef int ssize_t;
typedef int size_t;

static int my_strlen(const char *s)
{
int i = 0;

while (s[i] == '\0')
i++;

return i;
}

static inline ssize_t my_syscall(int fd, const void *buf, size_t count)
{
register ssize_t arg1 asm ("x0") = fd;
register const void *arg2 asm ("x1") = buf;
register size_t arg3 asm ("x2") = count;

__asm__ volatile (
"mov x8, #64\n"
"svc #0\n"
: "=&r" (arg1)
: "r" (arg2), "r" (arg3)
: "x8"
);

return arg1;
}

void sys_caller(const char *s)
{
my_syscall(1, s, my_strlen(s));
}


GCC 8.3.0:
==========

main.8.3.0.o: file format elf64-littleaarch64


Disassembly of section .text:

0000000000000000 <sys_caller>:
0: 39400001 ldrb w1, [x0]
4: 35000161 cbnz w1, 30 <sys_caller+0x30>
8: d2800023 mov x3, #0x1 // #1
c: d1000404 sub x4, x0, #0x1
10: 2a0303e2 mov w2, w3
14: 91000463 add x3, x3, #0x1
18: 38636881 ldrb w1, [x4, x3]
1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10>
20: aa0003e1 mov x1, x0
24: d2800808 mov x8, #0x40 // #64
28: d4000001 svc #0x0
2c: d65f03c0 ret
30: 52800002 mov w2, #0x0 // #0
34: 17fffffb b 20 <sys_caller+0x20>


GCC 8.1.0:
==========

main.8.1.0.o: file format elf64-littleaarch64


Disassembly of section .text:

0000000000000000 <sys_caller>:
0: 39400001 ldrb w1, [x0]
4: 35000161 cbnz w1, 30 <sys_caller+0x30>
8: d2800023 mov x3, #0x1 // #1
c: d1000404 sub x4, x0, #0x1
10: 2a0303e2 mov w2, w3
14: 91000463 add x3, x3, #0x1
18: 38636881 ldrb w1, [x4, x3]
1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10>
20: aa0003e1 mov x1, x0
24: d2800808 mov x8, #0x40 // #64
28: d4000001 svc #0x0
2c: d65f03c0 ret
30: 52800002 mov w2, #0x0 // #0
34: 17fffffb b 20 <sys_caller+0x20>



GCC 6.4.0:
==========

main.6.4.0.o: file format elf64-littleaarch64


Disassembly of section .text:

0000000000000000 <sys_caller>:
0: 39400001 ldrb w1, [x0]
4: 35000161 cbnz w1, 30 <sys_caller+0x30>
8: d2800023 mov x3, #0x1 // #1
c: d1000404 sub x4, x0, #0x1
10: 2a0303e2 mov w2, w3
14: 91000463 add x3, x3, #0x1
18: 38636881 ldrb w1, [x4, x3]
1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10>
20: aa0003e1 mov x1, x0
24: d2800808 mov x8, #0x40 // #64
28: d4000001 svc #0x0
2c: d65f03c0 ret
30: 52800002 mov w2, #0x0 // #0
34: 17fffffb b 20 <sys_caller+0x20>


> [...]
>
> Cheers
> ---Dave
>

--
Regards,
Vincenzo

2019-06-26 16:15:19

by Dave Martin

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

On Wed, Jun 26, 2019 at 02:27:59PM +0100, Vincenzo Frascino wrote:
> Hi Dave,
>
> On 25/06/2019 16:33, Dave Martin wrote:
> > On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote:
> >> To take advantage of the commonly defined vdso interface for
> >> gettimeofday the architectural code requires an adaptation.
> >>
> >> Re-implement the gettimeofday vdso in C in order to use lib/vdso.
> >>
> >> With the new implementation arm64 gains support for CLOCK_BOOTTIME
> >> and CLOCK_TAI.
> >>
> >> Cc: Catalin Marinas <[email protected]>
> >> Cc: Will Deacon <[email protected]>
> >> Signed-off-by: Vincenzo Frascino <[email protected]>
> >> Tested-by: Shijith Thotton <[email protected]>
> >> Tested-by: Andre Przywara <[email protected]>
> >
> > [...]
> >
> >> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
> >> new file mode 100644
> >> index 000000000000..bc3cb6738051
> >> --- /dev/null
> >> +++ b/arch/arm64/include/asm/vdso/gettimeofday.h
> >> @@ -0,0 +1,86 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +/*
> >> + * Copyright (C) 2018 ARM Limited
> >> + */
> >> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H
> >> +#define __ASM_VDSO_GETTIMEOFDAY_H
> >> +
> >> +#ifndef __ASSEMBLY__
> >> +
> >> +#include <asm/unistd.h>
> >> +#include <uapi/linux/time.h>
> >> +
> >> +#define VDSO_HAS_CLOCK_GETRES 1
> >> +
> >> +static __always_inline int gettimeofday_fallback(
> >> + struct __kernel_old_timeval *_tv,
> >> + struct timezone *_tz)
> >
> > Out of interest, does this need to be __always_inline?
> >
>
> It is a design choice. Philosophically, I prefer to control and reduce the scope
> of the decisions the compiler has to make in order to not have surprises.
>
> >> +{
> >> + register struct timezone *tz asm("x1") = _tz;
> >> + register struct __kernel_old_timeval *tv asm("x0") = _tv;
> >> + register long ret asm ("x0");
> >> + register long nr asm("x8") = __NR_gettimeofday;
> >> +
> >> + asm volatile(
> >> + " svc #0\n"
> >
> > Can inlining of this function result in non-trivial expressions being
> > substituted for _tz or _tv?
> >
> > A function call can clobber register asm vars that are assigned to the
> > caller-save registers or that the PCS uses for function arguments, and
> > the situations where this can happen are poorly defined AFAICT. There's
> > also no reliable way to detect at build time whether the compiler has
> > done this, and no robust way to stop if happening.
> >
> > (IMHO the compiler is wrong to do this, but it's been that way for ever,
> > and I think I saw GCC 9 show this behaviour recently when I was
> > investigating something related.)
> >
> >
> > To be safe, it's better to put this out of line, or remove the reg asm()
> > specifiers, mark x0-x18 and lr as clobbered here (so that the compiler
> > doesn't map arguments to them), and put movs in the asm to move things
> > into the right registers. The syscall number can be passed with an "i"
> > constraint. (And yes, this sucks.)
> >
> > If the code this is inlined in is simple enough though, we can be fairly
> > confident of getting away with it.
> >
>
> I took very seriously what you are mentioning here because I think
> that robustness of the code comes before than everything especially
> in the kernel and I carried on some experiments to try to verify if
> in this case is safe to assume that the compiler is doing the right
> thing.
>
> Based on my investigation and on previous observations of the
> generation of the vDSO library, I can conclude that the approach
> seems safe due to the fact that the usage of this code is very
> limited, the code itself is simple enough and that gcc would inline
> this code anyway based on the current compilation options.

I'd caution about "seems safe". A lot of subtly wrong code not only
seems safe, but _is_ safe in its original context, in practice. Add
some code to the vdso over time though, or tweak the compilation options
at some point in the future, or use a different compiler, and things
could still go wrong.

(Further comments below.)

> The experiment that I did was to define some self-contained code that
> tries to mimic what you are describing and compile it with 3
> different versions of gcc (6.4, 8.1 and 8.3) and in all the tree
> cases the behavior seems correct.
>
> Code:
> =====
>
> typedef int ssize_t;
> typedef int size_t;
>
> static int my_strlen(const char *s)
> {
> int i = 0;
>
> while (s[i] == '\0')
> i++;
>
> return i;
> }
>
> static inline ssize_t my_syscall(int fd, const void *buf, size_t count)
> {
> register ssize_t arg1 asm ("x0") = fd;
> register const void *arg2 asm ("x1") = buf;
> register size_t arg3 asm ("x2") = count;
>
> __asm__ volatile (
> "mov x8, #64\n"
> "svc #0\n"
> : "=&r" (arg1)
> : "r" (arg2), "r" (arg3)
> : "x8"
> );
>
> return arg1;
> }
>
> void sys_caller(const char *s)
> {
> my_syscall(1, s, my_strlen(s));
> }
>
>
> GCC 8.3.0:
> ==========
>
> main.8.3.0.o: file format elf64-littleaarch64
>
>
> Disassembly of section .text:
>
> 0000000000000000 <sys_caller>:
> 0: 39400001 ldrb w1, [x0]
> 4: 35000161 cbnz w1, 30 <sys_caller+0x30>
> 8: d2800023 mov x3, #0x1 // #1
> c: d1000404 sub x4, x0, #0x1
> 10: 2a0303e2 mov w2, w3
> 14: 91000463 add x3, x3, #0x1
> 18: 38636881 ldrb w1, [x4, x3]
> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10>
> 20: aa0003e1 mov x1, x0
> 24: d2800808 mov x8, #0x40 // #64
> 28: d4000001 svc #0x0
> 2c: d65f03c0 ret
> 30: 52800002 mov w2, #0x0 // #0
> 34: 17fffffb b 20 <sys_caller+0x20>
>
>
> GCC 8.1.0:
> ==========
>
> main.8.1.0.o: file format elf64-littleaarch64
>
>
> Disassembly of section .text:
>
> 0000000000000000 <sys_caller>:
> 0: 39400001 ldrb w1, [x0]
> 4: 35000161 cbnz w1, 30 <sys_caller+0x30>
> 8: d2800023 mov x3, #0x1 // #1
> c: d1000404 sub x4, x0, #0x1
> 10: 2a0303e2 mov w2, w3
> 14: 91000463 add x3, x3, #0x1
> 18: 38636881 ldrb w1, [x4, x3]
> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10>
> 20: aa0003e1 mov x1, x0
> 24: d2800808 mov x8, #0x40 // #64
> 28: d4000001 svc #0x0
> 2c: d65f03c0 ret
> 30: 52800002 mov w2, #0x0 // #0
> 34: 17fffffb b 20 <sys_caller+0x20>
>
>
>
> GCC 6.4.0:
> ==========
>
> main.6.4.0.o: file format elf64-littleaarch64
>
>
> Disassembly of section .text:
>
> 0000000000000000 <sys_caller>:
> 0: 39400001 ldrb w1, [x0]
> 4: 35000161 cbnz w1, 30 <sys_caller+0x30>
> 8: d2800023 mov x3, #0x1 // #1
> c: d1000404 sub x4, x0, #0x1
> 10: 2a0303e2 mov w2, w3
> 14: 91000463 add x3, x3, #0x1
> 18: 38636881 ldrb w1, [x4, x3]
> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10>
> 20: aa0003e1 mov x1, x0
> 24: d2800808 mov x8, #0x40 // #64
> 28: d4000001 svc #0x0
> 2c: d65f03c0 ret
> 30: 52800002 mov w2, #0x0 // #0
> 34: 17fffffb b 20 <sys_caller+0x20>

Thanks for having a go at this. If the compiler can show the
problematic behaviour, it looks like your could could probably trigger
it, and as you observe, it doesn't trigger.

I am sure I have seen it in the past, but today I am struggling
to tickle the compiler in the right way. My original reproducer may
have involved LTO, but either way I don't still have it :(


The classic example of this (triggered directly and not due to inlining)
would be something like:

int bar(int, int);

void foo(int x, int y)
{
register int x_ asm("r0") = x;
register int y_ asm("r1") = bar(x, y);

asm volatile (
"svc #0"
:: "r" (x_), "r" (y_)
: "memory"
);
}

->

0000000000000000 <foo>:
0: a9bf7bfd stp x29, x30, [sp, #-16]!
4: 910003fd mov x29, sp
8: 94000000 bl 0 <bar>
c: 2a0003e1 mov w1, w0
10: d4000001 svc #0x0
14: a8c17bfd ldp x29, x30, [sp], #16
18: d65f03c0 ret


The gcc documentation is vague and ambiguous about precisely whan this
can happen and about how to avoid it.

The case where this behaviour is triggered by inlining an expression
that involves a (possibly implicit) function call seems hard to
reproduce.


However, the workaround is cheap, and to avoid the chance of subtle
intermittent code gen bugs it may be worth it:

void foo(int x, int y)
{
asm volatile (
"mov x0, %0\n\t"
"mov x1, %1\n\t"
"svc #0"
:: "r" (x), "r" (bar(x, y))
: "r0", "r1", "memory"
);
}

->

0000000000000000 <foo>:
0: a9be7bfd stp x29, x30, [sp, #-32]!
4: 910003fd mov x29, sp
8: f9000bf3 str x19, [sp, #16]
c: 2a0003f3 mov w19, w0
10: 94000000 bl 0 <bar>
14: 2a0003e2 mov w2, w0
18: aa1303e0 mov x0, x19
1c: aa0203e1 mov x1, x2
20: d4000001 svc #0x0
24: f9400bf3 ldr x19, [sp, #16]
28: a8c27bfd ldp x29, x30, [sp], #32
2c: d65f03c0 ret


What do you think?

Cheers
---Dave

2019-06-26 19:01:37

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

Hi Dave,

thank you for the quick turn around.

On 6/26/19 5:14 PM, Dave Martin wrote:
> On Wed, Jun 26, 2019 at 02:27:59PM +0100, Vincenzo Frascino wrote:
>> Hi Dave,
>>
>> On 25/06/2019 16:33, Dave Martin wrote:
>>> On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote:
>>>> To take advantage of the commonly defined vdso interface for
>>>> gettimeofday the architectural code requires an adaptation.
>>>>
>>>> Re-implement the gettimeofday vdso in C in order to use lib/vdso.
>>>>
>>>> With the new implementation arm64 gains support for CLOCK_BOOTTIME
>>>> and CLOCK_TAI.
>>>>
>>>> Cc: Catalin Marinas <[email protected]>
>>>> Cc: Will Deacon <[email protected]>
>>>> Signed-off-by: Vincenzo Frascino <[email protected]>
>>>> Tested-by: Shijith Thotton <[email protected]>
>>>> Tested-by: Andre Przywara <[email protected]>
>>>
>>> [...]
>>>
>>>> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
>>>> new file mode 100644
>>>> index 000000000000..bc3cb6738051
>>>> --- /dev/null
>>>> +++ b/arch/arm64/include/asm/vdso/gettimeofday.h
>>>> @@ -0,0 +1,86 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>> +/*
>>>> + * Copyright (C) 2018 ARM Limited
>>>> + */
>>>> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H
>>>> +#define __ASM_VDSO_GETTIMEOFDAY_H
>>>> +
>>>> +#ifndef __ASSEMBLY__
>>>> +
>>>> +#include <asm/unistd.h>
>>>> +#include <uapi/linux/time.h>
>>>> +
>>>> +#define VDSO_HAS_CLOCK_GETRES 1
>>>> +
>>>> +static __always_inline int gettimeofday_fallback(
>>>> + struct __kernel_old_timeval *_tv,
>>>> + struct timezone *_tz)
>>>
>>> Out of interest, does this need to be __always_inline?
>>>
>>
>> It is a design choice. Philosophically, I prefer to control and reduce the scope
>> of the decisions the compiler has to make in order to not have surprises.
>>
>>>> +{
>>>> + register struct timezone *tz asm("x1") = _tz;
>>>> + register struct __kernel_old_timeval *tv asm("x0") = _tv;
>>>> + register long ret asm ("x0");
>>>> + register long nr asm("x8") = __NR_gettimeofday;
>>>> +
>>>> + asm volatile(
>>>> + " svc #0\n"
>>>
>>> Can inlining of this function result in non-trivial expressions being
>>> substituted for _tz or _tv?
>>>
>>> A function call can clobber register asm vars that are assigned to the
>>> caller-save registers or that the PCS uses for function arguments, and
>>> the situations where this can happen are poorly defined AFAICT. There's
>>> also no reliable way to detect at build time whether the compiler has
>>> done this, and no robust way to stop if happening.
>>>
>>> (IMHO the compiler is wrong to do this, but it's been that way for ever,
>>> and I think I saw GCC 9 show this behaviour recently when I was
>>> investigating something related.)
>>>
>>>
>>> To be safe, it's better to put this out of line, or remove the reg asm()
>>> specifiers, mark x0-x18 and lr as clobbered here (so that the compiler
>>> doesn't map arguments to them), and put movs in the asm to move things
>>> into the right registers. The syscall number can be passed with an "i"
>>> constraint. (And yes, this sucks.)
>>>
>>> If the code this is inlined in is simple enough though, we can be fairly
>>> confident of getting away with it.
>>>
>>
>> I took very seriously what you are mentioning here because I think
>> that robustness of the code comes before than everything especially
>> in the kernel and I carried on some experiments to try to verify if
>> in this case is safe to assume that the compiler is doing the right
>> thing.
>>
>> Based on my investigation and on previous observations of the
>> generation of the vDSO library, I can conclude that the approach
>> seems safe due to the fact that the usage of this code is very
>> limited, the code itself is simple enough and that gcc would inline
>> this code anyway based on the current compilation options.
>
> I'd caution about "seems safe". A lot of subtly wrong code not only
> seems safe, but _is_ safe in its original context, in practice. Add
> some code to the vdso over time though, or tweak the compilation options
> at some point in the future, or use a different compiler, and things
> could still go wrong.
>
> (Further comments below.)
>

Allow me to provide a clarification on "seems safe" vs "is safe": my approach
"seems safe" because I am providing empirical evidence to support my thesis, but
I guess we both know that there is no simple way to prove in one way or another
that the problem has a complete solution.
The proposed problem involves suppositions on potential future code additions
and changes of behavior of the compiler that I can't either control or prevent.
In other words, I can comment and propose solutions only based on the current
status of the things, and it is what my analysis targets, not on what will
happen in future.

I will reply point by point below.

>> The experiment that I did was to define some self-contained code that
>> tries to mimic what you are describing and compile it with 3
>> different versions of gcc (6.4, 8.1 and 8.3) and in all the tree
>> cases the behavior seems correct.
>>
>> Code:
>> =====
>>
>> typedef int ssize_t;
>> typedef int size_t;
>>
>> static int my_strlen(const char *s)
>> {
>> int i = 0;
>>
>> while (s[i] == '\0')
>> i++;
>>
>> return i;
>> }
>>
>> static inline ssize_t my_syscall(int fd, const void *buf, size_t count)
>> {
>> register ssize_t arg1 asm ("x0") = fd;
>> register const void *arg2 asm ("x1") = buf;
>> register size_t arg3 asm ("x2") = count;
>>
>> __asm__ volatile (
>> "mov x8, #64\n"
>> "svc #0\n"
>> : "=&r" (arg1)
>> : "r" (arg2), "r" (arg3)
>> : "x8"
>> );
>>
>> return arg1;
>> }
>>
>> void sys_caller(const char *s)
>> {
>> my_syscall(1, s, my_strlen(s));
>> }
>>
>>
>> GCC 8.3.0:
>> ==========
>>
>> main.8.3.0.o: file format elf64-littleaarch64
>>
>>
>> Disassembly of section .text:
>>
>> 0000000000000000 <sys_caller>:
>> 0: 39400001 ldrb w1, [x0]
>> 4: 35000161 cbnz w1, 30 <sys_caller+0x30>
>> 8: d2800023 mov x3, #0x1 // #1
>> c: d1000404 sub x4, x0, #0x1
>> 10: 2a0303e2 mov w2, w3
>> 14: 91000463 add x3, x3, #0x1
>> 18: 38636881 ldrb w1, [x4, x3]
>> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10>
>> 20: aa0003e1 mov x1, x0
>> 24: d2800808 mov x8, #0x40 // #64
>> 28: d4000001 svc #0x0
>> 2c: d65f03c0 ret
>> 30: 52800002 mov w2, #0x0 // #0
>> 34: 17fffffb b 20 <sys_caller+0x20>
>>
>>
>> GCC 8.1.0:
>> ==========
>>
>> main.8.1.0.o: file format elf64-littleaarch64
>>
>>
>> Disassembly of section .text:
>>
>> 0000000000000000 <sys_caller>:
>> 0: 39400001 ldrb w1, [x0]
>> 4: 35000161 cbnz w1, 30 <sys_caller+0x30>
>> 8: d2800023 mov x3, #0x1 // #1
>> c: d1000404 sub x4, x0, #0x1
>> 10: 2a0303e2 mov w2, w3
>> 14: 91000463 add x3, x3, #0x1
>> 18: 38636881 ldrb w1, [x4, x3]
>> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10>
>> 20: aa0003e1 mov x1, x0
>> 24: d2800808 mov x8, #0x40 // #64
>> 28: d4000001 svc #0x0
>> 2c: d65f03c0 ret
>> 30: 52800002 mov w2, #0x0 // #0
>> 34: 17fffffb b 20 <sys_caller+0x20>
>>
>>
>>
>> GCC 6.4.0:
>> ==========
>>
>> main.6.4.0.o: file format elf64-littleaarch64
>>
>>
>> Disassembly of section .text:
>>
>> 0000000000000000 <sys_caller>:
>> 0: 39400001 ldrb w1, [x0]
>> 4: 35000161 cbnz w1, 30 <sys_caller+0x30>
>> 8: d2800023 mov x3, #0x1 // #1
>> c: d1000404 sub x4, x0, #0x1
>> 10: 2a0303e2 mov w2, w3
>> 14: 91000463 add x3, x3, #0x1
>> 18: 38636881 ldrb w1, [x4, x3]
>> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10>
>> 20: aa0003e1 mov x1, x0
>> 24: d2800808 mov x8, #0x40 // #64
>> 28: d4000001 svc #0x0
>> 2c: d65f03c0 ret
>> 30: 52800002 mov w2, #0x0 // #0
>> 34: 17fffffb b 20 <sys_caller+0x20>
>
> Thanks for having a go at this. If the compiler can show the
> problematic behaviour, it looks like your could could probably trigger
> it, and as you observe, it doesn't trigger.
>
> I am sure I have seen it in the past, but today I am struggling
> to tickle the compiler in the right way. My original reproducer may
> have involved LTO, but either way I don't still have it :(
>

vDSO library is a shared object not compiled with LTO as far as I can see, hence
if this involved LTO should not applicable in this case.


>
> The classic example of this (triggered directly and not due to inlining)
> would be something like:
>
> int bar(int, int);
>
> void foo(int x, int y)
> {
> register int x_ asm("r0") = x;
> register int y_ asm("r1") = bar(x, y);
>
> asm volatile (
> "svc #0"
> :: "r" (x_), "r" (y_)
> : "memory"
> );
> }
>
> ->
>
> 0000000000000000 <foo>:
> 0: a9bf7bfd stp x29, x30, [sp, #-16]!
> 4: 910003fd mov x29, sp
> 8: 94000000 bl 0 <bar>
> c: 2a0003e1 mov w1, w0
> 10: d4000001 svc #0x0
> 14: a8c17bfd ldp x29, x30, [sp], #16
> 18: d65f03c0 ret
>

Contextualized to what my vdso fallback functions do, this should not be a
concern because in no case a function result is directly set to a variable
declared as register.

Since the vdso fallback functions serve a very specific and limited purpose, I
do not expect that that code is going to change much in future.

The only thing that can happen is something similar to what I wrote in my
example, which as I empirically proved does not trigger the problematic behavior.

>
> The gcc documentation is vague and ambiguous about precisely whan this
> can happen and about how to avoid it.
>

On this I agree, it is not very clear, but this seems more something to raise
with the gcc folks in order to have a more "explicit" description that leaves no
room to the interpretation.

...

>
> However, the workaround is cheap, and to avoid the chance of subtle
> intermittent code gen bugs it may be worth it:
>
> void foo(int x, int y)
> {
> asm volatile (
> "mov x0, %0\n\t"
> "mov x1, %1\n\t"
> "svc #0"
> :: "r" (x), "r" (bar(x, y))
> : "r0", "r1", "memory"
> );
> }
>
> ->
>
> 0000000000000000 <foo>:
> 0: a9be7bfd stp x29, x30, [sp, #-32]!
> 4: 910003fd mov x29, sp
> 8: f9000bf3 str x19, [sp, #16]
> c: 2a0003f3 mov w19, w0
> 10: 94000000 bl 0 <bar>
> 14: 2a0003e2 mov w2, w0
> 18: aa1303e0 mov x0, x19
> 1c: aa0203e1 mov x1, x2
> 20: d4000001 svc #0x0
> 24: f9400bf3 ldr x19, [sp, #16]
> 28: a8c27bfd ldp x29, x30, [sp], #32
> 2c: d65f03c0 ret
>
>
> What do you think?
>

The solution seems ok, thanks for providing it, but IMHO I think we should find
a workaround for something that is broken, which, unless I am missing something
major, this seems not the case.

> Cheers
> ---Dave
>

--
Regards,
Vincenzo

2019-06-27 10:04:37

by Dave Martin

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

On Wed, Jun 26, 2019 at 08:01:58PM +0100, Vincenzo Frascino wrote:

[...]

> On 6/26/19 5:14 PM, Dave Martin wrote:
> > On Wed, Jun 26, 2019 at 02:27:59PM +0100, Vincenzo Frascino wrote:
> >> Hi Dave,
> >>
> >> On 25/06/2019 16:33, Dave Martin wrote:
> >>> On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote:
> >>>> To take advantage of the commonly defined vdso interface for
> >>>> gettimeofday the architectural code requires an adaptation.
> >>>>
> >>>> Re-implement the gettimeofday vdso in C in order to use lib/vdso.
> >>>>
> >>>> With the new implementation arm64 gains support for CLOCK_BOOTTIME
> >>>> and CLOCK_TAI.
> >>>>
> >>>> Cc: Catalin Marinas <[email protected]>
> >>>> Cc: Will Deacon <[email protected]>
> >>>> Signed-off-by: Vincenzo Frascino <[email protected]>
> >>>> Tested-by: Shijith Thotton <[email protected]>
> >>>> Tested-by: Andre Przywara <[email protected]>
> >>>
> >>> [...]
> >>>
> >>>> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
> >>>> new file mode 100644
> >>>> index 000000000000..bc3cb6738051
> >>>> --- /dev/null
> >>>> +++ b/arch/arm64/include/asm/vdso/gettimeofday.h
> >>>> @@ -0,0 +1,86 @@
> >>>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>>> +/*
> >>>> + * Copyright (C) 2018 ARM Limited
> >>>> + */
> >>>> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H
> >>>> +#define __ASM_VDSO_GETTIMEOFDAY_H
> >>>> +
> >>>> +#ifndef __ASSEMBLY__
> >>>> +
> >>>> +#include <asm/unistd.h>
> >>>> +#include <uapi/linux/time.h>
> >>>> +
> >>>> +#define VDSO_HAS_CLOCK_GETRES 1
> >>>> +
> >>>> +static __always_inline int gettimeofday_fallback(
> >>>> + struct __kernel_old_timeval *_tv,
> >>>> + struct timezone *_tz)
> >>>
> >>> Out of interest, does this need to be __always_inline?
> >>>
> >>
> >> It is a design choice. Philosophically, I prefer to control and reduce the scope
> >> of the decisions the compiler has to make in order to not have surprises.
> >>
> >>>> +{
> >>>> + register struct timezone *tz asm("x1") = _tz;
> >>>> + register struct __kernel_old_timeval *tv asm("x0") = _tv;
> >>>> + register long ret asm ("x0");
> >>>> + register long nr asm("x8") = __NR_gettimeofday;
> >>>> +
> >>>> + asm volatile(
> >>>> + " svc #0\n"
> >>>
> >>> Can inlining of this function result in non-trivial expressions being
> >>> substituted for _tz or _tv?
> >>>
> >>> A function call can clobber register asm vars that are assigned to the
> >>> caller-save registers or that the PCS uses for function arguments, and
> >>> the situations where this can happen are poorly defined AFAICT. There's
> >>> also no reliable way to detect at build time whether the compiler has
> >>> done this, and no robust way to stop if happening.
> >>>
> >>> (IMHO the compiler is wrong to do this, but it's been that way for ever,
> >>> and I think I saw GCC 9 show this behaviour recently when I was
> >>> investigating something related.)
> >>>
> >>>
> >>> To be safe, it's better to put this out of line, or remove the reg asm()
> >>> specifiers, mark x0-x18 and lr as clobbered here (so that the compiler
> >>> doesn't map arguments to them), and put movs in the asm to move things
> >>> into the right registers. The syscall number can be passed with an "i"
> >>> constraint. (And yes, this sucks.)
> >>>
> >>> If the code this is inlined in is simple enough though, we can be fairly
> >>> confident of getting away with it.
> >>>
> >>
> >> I took very seriously what you are mentioning here because I think
> >> that robustness of the code comes before than everything especially
> >> in the kernel and I carried on some experiments to try to verify if
> >> in this case is safe to assume that the compiler is doing the right
> >> thing.
> >>
> >> Based on my investigation and on previous observations of the
> >> generation of the vDSO library, I can conclude that the approach
> >> seems safe due to the fact that the usage of this code is very
> >> limited, the code itself is simple enough and that gcc would inline
> >> this code anyway based on the current compilation options.
> >
> > I'd caution about "seems safe". A lot of subtly wrong code not only
> > seems safe, but _is_ safe in its original context, in practice. Add
> > some code to the vdso over time though, or tweak the compilation options
> > at some point in the future, or use a different compiler, and things
> > could still go wrong.
> >
> > (Further comments below.)
> >
>
> Allow me to provide a clarification on "seems safe" vs "is safe": my approach
> "seems safe" because I am providing empirical evidence to support my thesis, but
> I guess we both know that there is no simple way to prove in one way or another
> that the problem has a complete solution.
> The proposed problem involves suppositions on potential future code additions
> and changes of behavior of the compiler that I can't either control or prevent.
> In other words, I can comment and propose solutions only based on the current
> status of the things, and it is what my analysis targets, not on what will
> happen in future.
>
> I will reply point by point below.
>
> >> The experiment that I did was to define some self-contained code that
> >> tries to mimic what you are describing and compile it with 3
> >> different versions of gcc (6.4, 8.1 and 8.3) and in all the tree
> >> cases the behavior seems correct.
> >>
> >> Code:
> >> =====
> >>
> >> typedef int ssize_t;
> >> typedef int size_t;
> >>
> >> static int my_strlen(const char *s)
> >> {
> >> int i = 0;
> >>
> >> while (s[i] == '\0')
> >> i++;
> >>
> >> return i;
> >> }
> >>
> >> static inline ssize_t my_syscall(int fd, const void *buf, size_t count)
> >> {
> >> register ssize_t arg1 asm ("x0") = fd;
> >> register const void *arg2 asm ("x1") = buf;
> >> register size_t arg3 asm ("x2") = count;
> >>
> >> __asm__ volatile (
> >> "mov x8, #64\n"
> >> "svc #0\n"
> >> : "=&r" (arg1)
> >> : "r" (arg2), "r" (arg3)
> >> : "x8"
> >> );
> >>
> >> return arg1;
> >> }
> >>
> >> void sys_caller(const char *s)
> >> {
> >> my_syscall(1, s, my_strlen(s));
> >> }
> >>
> >>
> >> GCC 8.3.0:
> >> ==========
> >>
> >> main.8.3.0.o: file format elf64-littleaarch64
> >>
> >>
> >> Disassembly of section .text:
> >>
> >> 0000000000000000 <sys_caller>:
> >> 0: 39400001 ldrb w1, [x0]
> >> 4: 35000161 cbnz w1, 30 <sys_caller+0x30>
> >> 8: d2800023 mov x3, #0x1 // #1
> >> c: d1000404 sub x4, x0, #0x1
> >> 10: 2a0303e2 mov w2, w3
> >> 14: 91000463 add x3, x3, #0x1
> >> 18: 38636881 ldrb w1, [x4, x3]
> >> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10>
> >> 20: aa0003e1 mov x1, x0
> >> 24: d2800808 mov x8, #0x40 // #64
> >> 28: d4000001 svc #0x0
> >> 2c: d65f03c0 ret
> >> 30: 52800002 mov w2, #0x0 // #0
> >> 34: 17fffffb b 20 <sys_caller+0x20>
> >>
> >>
> >> GCC 8.1.0:
> >> ==========
> >>
> >> main.8.1.0.o: file format elf64-littleaarch64
> >>
> >>
> >> Disassembly of section .text:
> >>
> >> 0000000000000000 <sys_caller>:
> >> 0: 39400001 ldrb w1, [x0]
> >> 4: 35000161 cbnz w1, 30 <sys_caller+0x30>
> >> 8: d2800023 mov x3, #0x1 // #1
> >> c: d1000404 sub x4, x0, #0x1
> >> 10: 2a0303e2 mov w2, w3
> >> 14: 91000463 add x3, x3, #0x1
> >> 18: 38636881 ldrb w1, [x4, x3]
> >> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10>
> >> 20: aa0003e1 mov x1, x0
> >> 24: d2800808 mov x8, #0x40 // #64
> >> 28: d4000001 svc #0x0
> >> 2c: d65f03c0 ret
> >> 30: 52800002 mov w2, #0x0 // #0
> >> 34: 17fffffb b 20 <sys_caller+0x20>
> >>
> >>
> >>
> >> GCC 6.4.0:
> >> ==========
> >>
> >> main.6.4.0.o: file format elf64-littleaarch64
> >>
> >>
> >> Disassembly of section .text:
> >>
> >> 0000000000000000 <sys_caller>:
> >> 0: 39400001 ldrb w1, [x0]
> >> 4: 35000161 cbnz w1, 30 <sys_caller+0x30>
> >> 8: d2800023 mov x3, #0x1 // #1
> >> c: d1000404 sub x4, x0, #0x1
> >> 10: 2a0303e2 mov w2, w3
> >> 14: 91000463 add x3, x3, #0x1
> >> 18: 38636881 ldrb w1, [x4, x3]
> >> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10>
> >> 20: aa0003e1 mov x1, x0
> >> 24: d2800808 mov x8, #0x40 // #64
> >> 28: d4000001 svc #0x0
> >> 2c: d65f03c0 ret
> >> 30: 52800002 mov w2, #0x0 // #0
> >> 34: 17fffffb b 20 <sys_caller+0x20>
> >
> > Thanks for having a go at this. If the compiler can show the
> > problematic behaviour, it looks like your could could probably trigger
> > it, and as you observe, it doesn't trigger.
> >
> > I am sure I have seen it in the past, but today I am struggling
> > to tickle the compiler in the right way. My original reproducer may
> > have involved LTO, but either way I don't still have it :(
> >
>
> vDSO library is a shared object not compiled with LTO as far as I can
> see, hence if this involved LTO should not applicable in this case.

That turned to be a spurious hypothesis on my part -- LTO isn't the
smoking gun. (See below.)

> > The classic example of this (triggered directly and not due to inlining)
> > would be something like:
> >
> > int bar(int, int);
> >
> > void foo(int x, int y)
> > {
> > register int x_ asm("r0") = x;
> > register int y_ asm("r1") = bar(x, y);
> >
> > asm volatile (
> > "svc #0"
> > :: "r" (x_), "r" (y_)
> > : "memory"
> > );
> > }
> >
> > ->
> >
> > 0000000000000000 <foo>:
> > 0: a9bf7bfd stp x29, x30, [sp, #-16]!
> > 4: 910003fd mov x29, sp
> > 8: 94000000 bl 0 <bar>
> > c: 2a0003e1 mov w1, w0
> > 10: d4000001 svc #0x0
> > 14: a8c17bfd ldp x29, x30, [sp], #16
> > 18: d65f03c0 ret
> >
>
> Contextualized to what my vdso fallback functions do, this should not be a
> concern because in no case a function result is directly set to a variable
> declared as register.
>
> Since the vdso fallback functions serve a very specific and limited purpose, I
> do not expect that that code is going to change much in future.
>
> The only thing that can happen is something similar to what I wrote in my
> example, which as I empirically proved does not trigger the problematic behavior.
>
> >
> > The gcc documentation is vague and ambiguous about precisely whan this
> > can happen and about how to avoid it.
> >
>
> On this I agree, it is not very clear, but this seems more something to raise
> with the gcc folks in order to have a more "explicit" description that leaves no
> room to the interpretation.
>
> ...
>
> >
> > However, the workaround is cheap, and to avoid the chance of subtle
> > intermittent code gen bugs it may be worth it:
> >
> > void foo(int x, int y)
> > {
> > asm volatile (
> > "mov x0, %0\n\t"
> > "mov x1, %1\n\t"
> > "svc #0"
> > :: "r" (x), "r" (bar(x, y))
> > : "r0", "r1", "memory"
> > );
> > }
> >
> > ->
> >
> > 0000000000000000 <foo>:
> > 0: a9be7bfd stp x29, x30, [sp, #-32]!
> > 4: 910003fd mov x29, sp
> > 8: f9000bf3 str x19, [sp, #16]
> > c: 2a0003f3 mov w19, w0
> > 10: 94000000 bl 0 <bar>
> > 14: 2a0003e2 mov w2, w0
> > 18: aa1303e0 mov x0, x19
> > 1c: aa0203e1 mov x1, x2
> > 20: d4000001 svc #0x0
> > 24: f9400bf3 ldr x19, [sp, #16]
> > 28: a8c27bfd ldp x29, x30, [sp], #32
> > 2c: d65f03c0 ret
> >
> >
> > What do you think?
> >
>
> The solution seems ok, thanks for providing it, but IMHO I think we
> should find a workaround for something that is broken, which, unless
> I am missing something major, this seems not the case.

So, after a bit of further experimentation, I found that I could trigger
it with implicit function calls on an older compiler. I couldn't show
it with explicit function calls (as in your example).

With the following code, inlining if an expression that causes an
implicit call to a libgcc helper can trigger this issue, but I had to
try an older compiler:

int foo(int x, int y)
{
register int res asm("r0");
register const int x_ asm("r0") = x;
register const int y_ asm("r1") = y;

asm volatile (
"svc #0"
: "=r" (res)
: "r" (x_), "r" (y_)
: "memory"
);

return res;
}

int bar(int x, int y)
{
return foo(x, x / y);
}

-> (arm-linux-gnueabihf-gcc 9.1 -O2)

00000000 <foo>:
0: df00 svc 0
2: 4770 bx lr

00000004 <bar>:
4: b510 push {r4, lr}
6: 4604 mov r4, r0
8: f7ff fffe bl 0 <__aeabi_idiv>
c: 4601 mov r1, r0
e: 4620 mov r0, r4
10: df00 svc 0
12: bd10 pop {r4, pc}

-> (arm-linux-gnueabihf-gcc 5.1 -O2)

00000000 <foo>:
0: df00 svc 0
2: 4770 bx lr

00000004 <bar>:
4: b508 push {r3, lr}
6: f7ff fffe bl 0 <__aeabi_idiv>
a: 4601 mov r1, r0
c: df00 svc 0
e: bd08 pop {r3, pc}

I was struggling to find a way to emit an implicit function call for
AArch64, except for 128-bit divide, which would complicate things since
uint128_t doesn't fit in a single register anyway.

Maybe this was considered a bug and fixed sometime after GCC 5, but I
think the GCC documentation is still quite unclear on the semantics of
register asm vars that alias call-clobbered registers in the PCS.

If we can get a promise out of the GCC folks that this will not happen
with any future compiler, then maybe we could just require a new enough
compiler to be used.

Then of course there is clang.

Cheers
---Dave

2019-06-27 10:57:11

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

Hi Dave,

Overall, I want to thank you for bringing out the topic. It helped me to
question some decisions and make sure that we have no holes left in the approach.

[...]

>>
>> vDSO library is a shared object not compiled with LTO as far as I can
>> see, hence if this involved LTO should not applicable in this case.
>
> That turned to be a spurious hypothesis on my part -- LTO isn't the
> smoking gun. (See below.)
>

Ok.

>>> The classic example of this (triggered directly and not due to inlining)
>>> would be something like:
>>>
>>> int bar(int, int);
>>>
>>> void foo(int x, int y)
>>> {
>>> register int x_ asm("r0") = x;
>>> register int y_ asm("r1") = bar(x, y);
>>>
>>> asm volatile (
>>> "svc #0"
>>> :: "r" (x_), "r" (y_)
>>> : "memory"
>>> );
>>> }
>>>
>>> ->
>>>
>>> 0000000000000000 <foo>:
>>> 0: a9bf7bfd stp x29, x30, [sp, #-16]!
>>> 4: 910003fd mov x29, sp
>>> 8: 94000000 bl 0 <bar>
>>> c: 2a0003e1 mov w1, w0
>>> 10: d4000001 svc #0x0
>>> 14: a8c17bfd ldp x29, x30, [sp], #16
>>> 18: d65f03c0 ret
>>>
>>
>> Contextualized to what my vdso fallback functions do, this should not be a
>> concern because in no case a function result is directly set to a variable
>> declared as register.
>>
>> Since the vdso fallback functions serve a very specific and limited purpose, I
>> do not expect that that code is going to change much in future.
>>
>> The only thing that can happen is something similar to what I wrote in my
>> example, which as I empirically proved does not trigger the problematic behavior.
>>
>>>
>>> The gcc documentation is vague and ambiguous about precisely whan this
>>> can happen and about how to avoid it.
>>>
>>
>> On this I agree, it is not very clear, but this seems more something to raise
>> with the gcc folks in order to have a more "explicit" description that leaves no
>> room to the interpretation.
>>
>> ...
>>
>>>
>>> However, the workaround is cheap, and to avoid the chance of subtle
>>> intermittent code gen bugs it may be worth it:
>>>
>>> void foo(int x, int y)
>>> {
>>> asm volatile (
>>> "mov x0, %0\n\t"
>>> "mov x1, %1\n\t"
>>> "svc #0"
>>> :: "r" (x), "r" (bar(x, y))
>>> : "r0", "r1", "memory"
>>> );
>>> }
>>>
>>> ->
>>>
>>> 0000000000000000 <foo>:
>>> 0: a9be7bfd stp x29, x30, [sp, #-32]!
>>> 4: 910003fd mov x29, sp
>>> 8: f9000bf3 str x19, [sp, #16]
>>> c: 2a0003f3 mov w19, w0
>>> 10: 94000000 bl 0 <bar>
>>> 14: 2a0003e2 mov w2, w0
>>> 18: aa1303e0 mov x0, x19
>>> 1c: aa0203e1 mov x1, x2
>>> 20: d4000001 svc #0x0
>>> 24: f9400bf3 ldr x19, [sp, #16]
>>> 28: a8c27bfd ldp x29, x30, [sp], #32
>>> 2c: d65f03c0 ret
>>>
>>>
>>> What do you think?
>>>
>>
>> The solution seems ok, thanks for providing it, but IMHO I think we
>> should find a workaround for something that is broken, which, unless
>> I am missing something major, this seems not the case.
>
> So, after a bit of further experimentation, I found that I could trigger
> it with implicit function calls on an older compiler. I couldn't show
> it with explicit function calls (as in your example).
>
> With the following code, inlining if an expression that causes an
> implicit call to a libgcc helper can trigger this issue, but I had to
> try an older compiler:
>
> int foo(int x, int y)
> {
> register int res asm("r0");
> register const int x_ asm("r0") = x;
> register const int y_ asm("r1") = y;
>
> asm volatile (
> "svc #0"
> : "=r" (res)
> : "r" (x_), "r" (y_)
> : "memory"
> );
>
> return res;
> }
>
> int bar(int x, int y)
> {
> return foo(x, x / y);
> }
>
> -> (arm-linux-gnueabihf-gcc 9.1 -O2)
>
> 00000000 <foo>:
> 0: df00 svc 0
> 2: 4770 bx lr
>
> 00000004 <bar>:
> 4: b510 push {r4, lr}
> 6: 4604 mov r4, r0
> 8: f7ff fffe bl 0 <__aeabi_idiv>
> c: 4601 mov r1, r0
> e: 4620 mov r0, r4
> 10: df00 svc 0
> 12: bd10 pop {r4, pc}
>
> -> (arm-linux-gnueabihf-gcc 5.1 -O2)
>
> 00000000 <foo>:
> 0: df00 svc 0
> 2: 4770 bx lr
>
> 00000004 <bar>:
> 4: b508 push {r3, lr}
> 6: f7ff fffe bl 0 <__aeabi_idiv>
> a: 4601 mov r1, r0
> c: df00 svc 0
> e: bd08 pop {r3, pc}
>

Thanks for reporting this. I had a go with gcc-5.1 on the vDSO library and seems
Ok, but it was worth trying.

For obvious reasons I am not reporting the objdump here :)

> I was struggling to find a way to emit an implicit function call for
> AArch64, except for 128-bit divide, which would complicate things since
> uint128_t doesn't fit in a single register anyway.
>
> Maybe this was considered a bug and fixed sometime after GCC 5, but I
> think the GCC documentation is still quite unclear on the semantics of
> register asm vars that alias call-clobbered registers in the PCS.
>
> If we can get a promise out of the GCC folks that this will not happen
> with any future compiler, then maybe we could just require a new enough
> compiler to be used.
>

On this I fully agree, the compiler should never change an "expected" behavior.

If the issue comes from a gray area in the documentation, we have to address it
and have it fixed there.

The minimum version of the compiler from linux-4.19 is 4.6, hence I had to try
that the vDSO lib does not break with 5.1 [1].

[1]
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cafa0010cd51fb711fdcb50fc55f394c5f167a0a

> Then of course there is clang.
>

I could not help myself and I tried clang.8 and clang.7 as well with my example,
just to make sure that we are fine even in that case. Please find below the
results (pretty identical).

main.clang.7.o: file format ELF64-aarch64-little

Disassembly of section .text:
0000000000000000 show_it:
0: e8 03 1f aa mov x8, xzr
4: 09 68 68 38 ldrb w9, [x0, x8]
8: 08 05 00 91 add x8, x8, #1
c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4>
10: 02 05 00 51 sub w2, w8, #1
14: e1 03 00 aa mov x1, x0
18: 08 08 80 d2 mov x8, #64
1c: 01 00 00 d4 svc #0
20: c0 03 5f d6 ret

main.clang.8.o: file format ELF64-aarch64-little

Disassembly of section .text:
0000000000000000 show_it:
0: e8 03 1f aa mov x8, xzr
4: 09 68 68 38 ldrb w9, [x0, x8]
8: 08 05 00 91 add x8, x8, #1
c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4>
10: 02 05 00 51 sub w2, w8, #1
14: e1 03 00 aa mov x1, x0
18: 08 08 80 d2 mov x8, #64
1c: 01 00 00 d4 svc #0
20: c0 03 5f d6 ret

Commands used:

$ clang -target aarch64-linux-gnueabi main.c -O -c -o main.clang.<x>.o
$ llvm-objdump -d main.clang.<x>.o

> Cheers
> ---Dave
>

--
Regards,
Vincenzo

2019-06-27 11:29:10

by Dave Martin

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

On Thu, Jun 27, 2019 at 11:57:36AM +0100, Vincenzo Frascino wrote:
> Hi Dave,
>
> Overall, I want to thank you for bringing out the topic. It helped me to
> question some decisions and make sure that we have no holes left in
> the approach.

Fair enough.

This is really just a nasty compiler corner-case... the validity of the
overall approach isn't affected.

> >>
> >> vDSO library is a shared object not compiled with LTO as far as I can
> >> see, hence if this involved LTO should not applicable in this case.
> >
> > That turned to be a spurious hypothesis on my part -- LTO isn't the
> > smoking gun. (See below.)
> >
>
> Ok.
>
> >>> The classic example of this (triggered directly and not due to inlining)
> >>> would be something like:
> >>>
> >>> int bar(int, int);
> >>>
> >>> void foo(int x, int y)
> >>> {
> >>> register int x_ asm("r0") = x;
> >>> register int y_ asm("r1") = bar(x, y);
> >>>
> >>> asm volatile (
> >>> "svc #0"
> >>> :: "r" (x_), "r" (y_)
> >>> : "memory"
> >>> );
> >>> }
> >>>
> >>> ->
> >>>
> >>> 0000000000000000 <foo>:
> >>> 0: a9bf7bfd stp x29, x30, [sp, #-16]!
> >>> 4: 910003fd mov x29, sp
> >>> 8: 94000000 bl 0 <bar>
> >>> c: 2a0003e1 mov w1, w0
> >>> 10: d4000001 svc #0x0
> >>> 14: a8c17bfd ldp x29, x30, [sp], #16
> >>> 18: d65f03c0 ret
> >>>
> >>
> >> Contextualized to what my vdso fallback functions do, this should not be a
> >> concern because in no case a function result is directly set to a variable
> >> declared as register.
> >>
> >> Since the vdso fallback functions serve a very specific and limited purpose, I
> >> do not expect that that code is going to change much in future.
> >>
> >> The only thing that can happen is something similar to what I wrote in my
> >> example, which as I empirically proved does not trigger the problematic behavior.
> >>
> >>>
> >>> The gcc documentation is vague and ambiguous about precisely whan this
> >>> can happen and about how to avoid it.
> >>>
> >>
> >> On this I agree, it is not very clear, but this seems more something to raise
> >> with the gcc folks in order to have a more "explicit" description that leaves no
> >> room to the interpretation.
> >>
> >> ...
> >>
> >>>
> >>> However, the workaround is cheap, and to avoid the chance of subtle
> >>> intermittent code gen bugs it may be worth it:
> >>>
> >>> void foo(int x, int y)
> >>> {
> >>> asm volatile (
> >>> "mov x0, %0\n\t"
> >>> "mov x1, %1\n\t"
> >>> "svc #0"
> >>> :: "r" (x), "r" (bar(x, y))
> >>> : "r0", "r1", "memory"
> >>> );
> >>> }
> >>>
> >>> ->
> >>>
> >>> 0000000000000000 <foo>:
> >>> 0: a9be7bfd stp x29, x30, [sp, #-32]!
> >>> 4: 910003fd mov x29, sp
> >>> 8: f9000bf3 str x19, [sp, #16]
> >>> c: 2a0003f3 mov w19, w0
> >>> 10: 94000000 bl 0 <bar>
> >>> 14: 2a0003e2 mov w2, w0
> >>> 18: aa1303e0 mov x0, x19
> >>> 1c: aa0203e1 mov x1, x2
> >>> 20: d4000001 svc #0x0
> >>> 24: f9400bf3 ldr x19, [sp, #16]
> >>> 28: a8c27bfd ldp x29, x30, [sp], #32
> >>> 2c: d65f03c0 ret
> >>>
> >>>
> >>> What do you think?
> >>>
> >>
> >> The solution seems ok, thanks for providing it, but IMHO I think we
> >> should find a workaround for something that is broken, which, unless
> >> I am missing something major, this seems not the case.
> >
> > So, after a bit of further experimentation, I found that I could trigger
> > it with implicit function calls on an older compiler. I couldn't show
> > it with explicit function calls (as in your example).
> >
> > With the following code, inlining if an expression that causes an
> > implicit call to a libgcc helper can trigger this issue, but I had to
> > try an older compiler:
> >
> > int foo(int x, int y)
> > {
> > register int res asm("r0");
> > register const int x_ asm("r0") = x;
> > register const int y_ asm("r1") = y;
> >
> > asm volatile (
> > "svc #0"
> > : "=r" (res)
> > : "r" (x_), "r" (y_)
> > : "memory"
> > );
> >
> > return res;
> > }
> >
> > int bar(int x, int y)
> > {
> > return foo(x, x / y);
> > }
> >
> > -> (arm-linux-gnueabihf-gcc 9.1 -O2)
> >
> > 00000000 <foo>:
> > 0: df00 svc 0
> > 2: 4770 bx lr
> >
> > 00000004 <bar>:
> > 4: b510 push {r4, lr}
> > 6: 4604 mov r4, r0
> > 8: f7ff fffe bl 0 <__aeabi_idiv>
> > c: 4601 mov r1, r0
> > e: 4620 mov r0, r4
> > 10: df00 svc 0
> > 12: bd10 pop {r4, pc}
> >
> > -> (arm-linux-gnueabihf-gcc 5.1 -O2)
> >
> > 00000000 <foo>:
> > 0: df00 svc 0
> > 2: 4770 bx lr
> >
> > 00000004 <bar>:
> > 4: b508 push {r3, lr}
> > 6: f7ff fffe bl 0 <__aeabi_idiv>
> > a: 4601 mov r1, r0
> > c: df00 svc 0
> > e: bd08 pop {r3, pc}
> >
>
> Thanks for reporting this. I had a go with gcc-5.1 on the vDSO library and seems
> Ok, but it was worth trying.
>
> For obvious reasons I am not reporting the objdump here :)
>
> > I was struggling to find a way to emit an implicit function call for
> > AArch64, except for 128-bit divide, which would complicate things since
> > uint128_t doesn't fit in a single register anyway.
> >
> > Maybe this was considered a bug and fixed sometime after GCC 5, but I
> > think the GCC documentation is still quite unclear on the semantics of
> > register asm vars that alias call-clobbered registers in the PCS.
> >
> > If we can get a promise out of the GCC folks that this will not happen
> > with any future compiler, then maybe we could just require a new enough
> > compiler to be used.
> >
>
> On this I fully agree, the compiler should never change an "expected" behavior.
>
> If the issue comes from a gray area in the documentation, we have to address it
> and have it fixed there.
>
> The minimum version of the compiler from linux-4.19 is 4.6, hence I had to try
> that the vDSO lib does not break with 5.1 [1].
>
> [1]
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cafa0010cd51fb711fdcb50fc55f394c5f167a0a

OK

> > Then of course there is clang.
> >
>
> I could not help myself and I tried clang.8 and clang.7 as well with my example,
> just to make sure that we are fine even in that case. Please find below the
> results (pretty identical).
>
> main.clang.7.o: file format ELF64-aarch64-little
>
> Disassembly of section .text:
> 0000000000000000 show_it:
> 0: e8 03 1f aa mov x8, xzr
> 4: 09 68 68 38 ldrb w9, [x0, x8]
> 8: 08 05 00 91 add x8, x8, #1
> c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4>
> 10: 02 05 00 51 sub w2, w8, #1
> 14: e1 03 00 aa mov x1, x0
> 18: 08 08 80 d2 mov x8, #64
> 1c: 01 00 00 d4 svc #0
> 20: c0 03 5f d6 ret
>
> main.clang.8.o: file format ELF64-aarch64-little
>
> Disassembly of section .text:
> 0000000000000000 show_it:
> 0: e8 03 1f aa mov x8, xzr
> 4: 09 68 68 38 ldrb w9, [x0, x8]
> 8: 08 05 00 91 add x8, x8, #1
> c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4>
> 10: 02 05 00 51 sub w2, w8, #1
> 14: e1 03 00 aa mov x1, x0
> 18: 08 08 80 d2 mov x8, #64
> 1c: 01 00 00 d4 svc #0
> 20: c0 03 5f d6 ret
>
> Commands used:
>
> $ clang -target aarch64-linux-gnueabi main.c -O -c -o main.clang.<x>.o
> $ llvm-objdump -d main.clang.<x>.o

Actually, I'm not sure this is comparable with the reproducer I quoted
in my last reply.

The compiler can see the definition of strlen and fully inlines it.
I only ever saw the problem when the compiler emits an out-of-line
implicit function call.

What does clang do with my example on 32-bit?

Cheers
---Dave

2019-06-27 11:59:05

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

On 6/27/19 12:27 PM, Dave Martin wrote:
> On Thu, Jun 27, 2019 at 11:57:36AM +0100, Vincenzo Frascino wrote:
>> Hi Dave,
>>
>> Overall, I want to thank you for bringing out the topic. It helped me to
>> question some decisions and make sure that we have no holes left in
>> the approach.
>
> Fair enough.
>
> This is really just a nasty compiler corner-case... the validity of the
> overall approach isn't affected.
>
>>>>
>>>> vDSO library is a shared object not compiled with LTO as far as I can
>>>> see, hence if this involved LTO should not applicable in this case.
>>>
>>> That turned to be a spurious hypothesis on my part -- LTO isn't the
>>> smoking gun. (See below.)
>>>
>>
>> Ok.
>>
>>>>> The classic example of this (triggered directly and not due to inlining)
>>>>> would be something like:
>>>>>
>>>>> int bar(int, int);
>>>>>
>>>>> void foo(int x, int y)
>>>>> {
>>>>> register int x_ asm("r0") = x;
>>>>> register int y_ asm("r1") = bar(x, y);
>>>>>
>>>>> asm volatile (
>>>>> "svc #0"
>>>>> :: "r" (x_), "r" (y_)
>>>>> : "memory"
>>>>> );
>>>>> }
>>>>>
>>>>> ->
>>>>>
>>>>> 0000000000000000 <foo>:
>>>>> 0: a9bf7bfd stp x29, x30, [sp, #-16]!
>>>>> 4: 910003fd mov x29, sp
>>>>> 8: 94000000 bl 0 <bar>
>>>>> c: 2a0003e1 mov w1, w0
>>>>> 10: d4000001 svc #0x0
>>>>> 14: a8c17bfd ldp x29, x30, [sp], #16
>>>>> 18: d65f03c0 ret
>>>>>
>>>>
>>>> Contextualized to what my vdso fallback functions do, this should not be a
>>>> concern because in no case a function result is directly set to a variable
>>>> declared as register.
>>>>
>>>> Since the vdso fallback functions serve a very specific and limited purpose, I
>>>> do not expect that that code is going to change much in future.
>>>>
>>>> The only thing that can happen is something similar to what I wrote in my
>>>> example, which as I empirically proved does not trigger the problematic behavior.
>>>>
>>>>>
>>>>> The gcc documentation is vague and ambiguous about precisely whan this
>>>>> can happen and about how to avoid it.
>>>>>
>>>>
>>>> On this I agree, it is not very clear, but this seems more something to raise
>>>> with the gcc folks in order to have a more "explicit" description that leaves no
>>>> room to the interpretation.
>>>>
>>>> ...
>>>>
>>>>>
>>>>> However, the workaround is cheap, and to avoid the chance of subtle
>>>>> intermittent code gen bugs it may be worth it:
>>>>>
>>>>> void foo(int x, int y)
>>>>> {
>>>>> asm volatile (
>>>>> "mov x0, %0\n\t"
>>>>> "mov x1, %1\n\t"
>>>>> "svc #0"
>>>>> :: "r" (x), "r" (bar(x, y))
>>>>> : "r0", "r1", "memory"
>>>>> );
>>>>> }
>>>>>
>>>>> ->
>>>>>
>>>>> 0000000000000000 <foo>:
>>>>> 0: a9be7bfd stp x29, x30, [sp, #-32]!
>>>>> 4: 910003fd mov x29, sp
>>>>> 8: f9000bf3 str x19, [sp, #16]
>>>>> c: 2a0003f3 mov w19, w0
>>>>> 10: 94000000 bl 0 <bar>
>>>>> 14: 2a0003e2 mov w2, w0
>>>>> 18: aa1303e0 mov x0, x19
>>>>> 1c: aa0203e1 mov x1, x2
>>>>> 20: d4000001 svc #0x0
>>>>> 24: f9400bf3 ldr x19, [sp, #16]
>>>>> 28: a8c27bfd ldp x29, x30, [sp], #32
>>>>> 2c: d65f03c0 ret
>>>>>
>>>>>
>>>>> What do you think?
>>>>>
>>>>
>>>> The solution seems ok, thanks for providing it, but IMHO I think we
>>>> should find a workaround for something that is broken, which, unless
>>>> I am missing something major, this seems not the case.
>>>
>>> So, after a bit of further experimentation, I found that I could trigger
>>> it with implicit function calls on an older compiler. I couldn't show
>>> it with explicit function calls (as in your example).
>>>
>>> With the following code, inlining if an expression that causes an
>>> implicit call to a libgcc helper can trigger this issue, but I had to
>>> try an older compiler:
>>>
>>> int foo(int x, int y)
>>> {
>>> register int res asm("r0");
>>> register const int x_ asm("r0") = x;
>>> register const int y_ asm("r1") = y;
>>>
>>> asm volatile (
>>> "svc #0"
>>> : "=r" (res)
>>> : "r" (x_), "r" (y_)
>>> : "memory"
>>> );
>>>
>>> return res;
>>> }
>>>
>>> int bar(int x, int y)
>>> {
>>> return foo(x, x / y);
>>> }
>>>
>>> -> (arm-linux-gnueabihf-gcc 9.1 -O2)
>>>
>>> 00000000 <foo>:
>>> 0: df00 svc 0
>>> 2: 4770 bx lr
>>>
>>> 00000004 <bar>:
>>> 4: b510 push {r4, lr}
>>> 6: 4604 mov r4, r0
>>> 8: f7ff fffe bl 0 <__aeabi_idiv>
>>> c: 4601 mov r1, r0
>>> e: 4620 mov r0, r4
>>> 10: df00 svc 0
>>> 12: bd10 pop {r4, pc}
>>>
>>> -> (arm-linux-gnueabihf-gcc 5.1 -O2)
>>>
>>> 00000000 <foo>:
>>> 0: df00 svc 0
>>> 2: 4770 bx lr
>>>
>>> 00000004 <bar>:
>>> 4: b508 push {r3, lr}
>>> 6: f7ff fffe bl 0 <__aeabi_idiv>
>>> a: 4601 mov r1, r0
>>> c: df00 svc 0
>>> e: bd08 pop {r3, pc}
>>>
>>
>> Thanks for reporting this. I had a go with gcc-5.1 on the vDSO library and seems
>> Ok, but it was worth trying.
>>
>> For obvious reasons I am not reporting the objdump here :)
>>
>>> I was struggling to find a way to emit an implicit function call for
>>> AArch64, except for 128-bit divide, which would complicate things since
>>> uint128_t doesn't fit in a single register anyway.
>>>
>>> Maybe this was considered a bug and fixed sometime after GCC 5, but I
>>> think the GCC documentation is still quite unclear on the semantics of
>>> register asm vars that alias call-clobbered registers in the PCS.
>>>
>>> If we can get a promise out of the GCC folks that this will not happen
>>> with any future compiler, then maybe we could just require a new enough
>>> compiler to be used.
>>>
>>
>> On this I fully agree, the compiler should never change an "expected" behavior.
>>
>> If the issue comes from a gray area in the documentation, we have to address it
>> and have it fixed there.
>>
>> The minimum version of the compiler from linux-4.19 is 4.6, hence I had to try
>> that the vDSO lib does not break with 5.1 [1].
>>
>> [1]
>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cafa0010cd51fb711fdcb50fc55f394c5f167a0a
>
> OK
>
>>> Then of course there is clang.
>>>
>>
>> I could not help myself and I tried clang.8 and clang.7 as well with my example,
>> just to make sure that we are fine even in that case. Please find below the
>> results (pretty identical).
>>
>> main.clang.7.o: file format ELF64-aarch64-little
>>
>> Disassembly of section .text:
>> 0000000000000000 show_it:
>> 0: e8 03 1f aa mov x8, xzr
>> 4: 09 68 68 38 ldrb w9, [x0, x8]
>> 8: 08 05 00 91 add x8, x8, #1
>> c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4>
>> 10: 02 05 00 51 sub w2, w8, #1
>> 14: e1 03 00 aa mov x1, x0
>> 18: 08 08 80 d2 mov x8, #64
>> 1c: 01 00 00 d4 svc #0
>> 20: c0 03 5f d6 ret
>>
>> main.clang.8.o: file format ELF64-aarch64-little
>>
>> Disassembly of section .text:
>> 0000000000000000 show_it:
>> 0: e8 03 1f aa mov x8, xzr
>> 4: 09 68 68 38 ldrb w9, [x0, x8]
>> 8: 08 05 00 91 add x8, x8, #1
>> c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4>
>> 10: 02 05 00 51 sub w2, w8, #1
>> 14: e1 03 00 aa mov x1, x0
>> 18: 08 08 80 d2 mov x8, #64
>> 1c: 01 00 00 d4 svc #0
>> 20: c0 03 5f d6 ret
>>
>> Commands used:
>>
>> $ clang -target aarch64-linux-gnueabi main.c -O -c -o main.clang.<x>.o
>> $ llvm-objdump -d main.clang.<x>.o
>
> Actually, I'm not sure this is comparable with the reproducer I quoted
> in my last reply.
>

As explained in my previous email, this is the only case that can realistically
happen. vDSO has no dependency on any other library (i.e. libgcc you were
mentioning) and we are referring to the fallbacks which fall in this category.

> The compiler can see the definition of strlen and fully inlines it.
> I only ever saw the problem when the compiler emits an out-of-line
> implicit function call.
> > What does clang do with my example on 32-bit?

When clang is selected compat vDSOs are currently disabled on arm64, will be
introduced with a future patch series.

Anyway since I am curious as well, this is what happens with your example with
clang.8 target=arm-linux-gnueabihf:

dave-code.clang.8.o: file format ELF32-arm-little

Disassembly of section .text:
0000000000000000 foo:
0: 00 00 00 ef svc #0
4: 1e ff 2f e1 bx lr

0000000000000008 bar:
8: 10 4c 2d e9 push {r4, r10, r11, lr}
c: 08 b0 8d e2 add r11, sp, #8
10: 00 40 a0 e1 mov r4, r0
14: fe ff ff eb bl #-8 <bar+0xc>
18: 00 10 a0 e1 mov r1, r0
1c: 04 00 a0 e1 mov r0, r4
20: 00 00 00 ef svc #0
24: 10 8c bd e8 pop {r4, r10, r11, pc}

Compiled with -O2, -O3, -Os never inlines.

Same thing happens for aarch64-linux-gnueabi:

dave-code.clang.8.o: file format ELF64-aarch64-little

Disassembly of section .text:
0000000000000000 foo:
0: e0 03 00 2a mov w0, w0
4: e1 03 01 2a mov w1, w1
8: 01 00 00 d4 svc #0
c: c0 03 5f d6 ret

0000000000000010 bar:
10: 01 0c c1 1a sdiv w1, w0, w1
14: e0 03 00 2a mov w0, w0
18: 01 00 00 d4 svc #0
1c: c0 03 5f d6 ret


Based on this I think we can conclude our investigation.

>
> Cheers
> ---Dave
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>

--
Regards,
Vincenzo

2019-06-27 14:39:02

by Dave Martin

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

On Thu, Jun 27, 2019 at 12:59:07PM +0100, Vincenzo Frascino wrote:
> On 6/27/19 12:27 PM, Dave Martin wrote:
> > On Thu, Jun 27, 2019 at 11:57:36AM +0100, Vincenzo Frascino wrote:

[...]

> >> Disassembly of section .text:
> >> 0000000000000000 show_it:
> >> 0: e8 03 1f aa mov x8, xzr
> >> 4: 09 68 68 38 ldrb w9, [x0, x8]
> >> 8: 08 05 00 91 add x8, x8, #1
> >> c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4>
> >> 10: 02 05 00 51 sub w2, w8, #1
> >> 14: e1 03 00 aa mov x1, x0
> >> 18: 08 08 80 d2 mov x8, #64
> >> 1c: 01 00 00 d4 svc #0
> >> 20: c0 03 5f d6 ret
> >>
> >> Commands used:
> >>
> >> $ clang -target aarch64-linux-gnueabi main.c -O -c -o main.clang.<x>.o
> >> $ llvm-objdump -d main.clang.<x>.o
> >
> > Actually, I'm not sure this is comparable with the reproducer I quoted
> > in my last reply.
> >
>
> As explained in my previous email, this is the only case that can realistically
> happen. vDSO has no dependency on any other library (i.e. libgcc you were
> mentioning) and we are referring to the fallbacks which fall in this category.

Outlining could also introduce a local function call where none exists
explicitly in the program IIUC.

My point is that the interaction between asm reg vars and machine-level
procedure calls is at best ill-defined, and it is largely up to the
compiler when to introduce such a call, even without LTO etc.

So we should not be surprised to see variations in behaviour depending
on compiler, compiler version and compiler flags.

> > The compiler can see the definition of strlen and fully inlines it.
> > I only ever saw the problem when the compiler emits an out-of-line
> > implicit function call.
> > > What does clang do with my example on 32-bit?
>
> When clang is selected compat vDSOs are currently disabled on arm64, will be
> introduced with a future patch series.
>
> Anyway since I am curious as well, this is what happens with your example with
> clang.8 target=arm-linux-gnueabihf:
>
> dave-code.clang.8.o: file format ELF32-arm-little
>
> Disassembly of section .text:
> 0000000000000000 foo:
> 0: 00 00 00 ef svc #0
> 4: 1e ff 2f e1 bx lr
>
> 0000000000000008 bar:
> 8: 10 4c 2d e9 push {r4, r10, r11, lr}
> c: 08 b0 8d e2 add r11, sp, #8
> 10: 00 40 a0 e1 mov r4, r0
> 14: fe ff ff eb bl #-8 <bar+0xc>
> 18: 00 10 a0 e1 mov r1, r0
> 1c: 04 00 a0 e1 mov r0, r4
> 20: 00 00 00 ef svc #0
> 24: 10 8c bd e8 pop {r4, r10, r11, pc}

> Compiled with -O2, -O3, -Os never inlines.

Looks sane, and is the behaviour we want.

> Same thing happens for aarch64-linux-gnueabi:
>
> dave-code.clang.8.o: file format ELF64-aarch64-little
>
> Disassembly of section .text:
> 0000000000000000 foo:
> 0: e0 03 00 2a mov w0, w0
> 4: e1 03 01 2a mov w1, w1
> 8: 01 00 00 d4 svc #0
> c: c0 03 5f d6 ret
>
> 0000000000000010 bar:
> 10: 01 0c c1 1a sdiv w1, w0, w1
> 14: e0 03 00 2a mov w0, w0
> 18: 01 00 00 d4 svc #0
> 1c: c0 03 5f d6 ret

Curious, clang seems to be inserting some seemingly redundant moves
of its own here, though this shouldn't break anything.

I suspect that clang might require an X-reg holding an int to have its
top 32 bits zeroed for passing to an asm, whereas GCC does not. I think
this comes under "we should not be surprised to see variations".

GCC 9 does this instead:

0000000000000000 <foo>:
0: d4000001 svc #0x0
4: d65f03c0 ret

0000000000000008 <bar>:
8: 1ac10c01 sdiv w1, w0, w1
c: d4000001 svc #0x0
10: d65f03c0 ret


> Based on this I think we can conclude our investigation.

So we use non-reg vars and use the asm clobber list and explicit moves
to get things into / out of the right registers?

Cheers
---Dave

2019-06-27 15:34:25

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

Hi Dave,

On 6/27/19 3:38 PM, Dave Martin wrote:
> On Thu, Jun 27, 2019 at 12:59:07PM +0100, Vincenzo Frascino wrote:
>> On 6/27/19 12:27 PM, Dave Martin wrote:
>>> On Thu, Jun 27, 2019 at 11:57:36AM +0100, Vincenzo Frascino wrote:
>
> [...]
>
>>>> Disassembly of section .text:
>>>> 0000000000000000 show_it:
>>>> 0: e8 03 1f aa mov x8, xzr
>>>> 4: 09 68 68 38 ldrb w9, [x0, x8]
>>>> 8: 08 05 00 91 add x8, x8, #1
>>>> c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4>
>>>> 10: 02 05 00 51 sub w2, w8, #1
>>>> 14: e1 03 00 aa mov x1, x0
>>>> 18: 08 08 80 d2 mov x8, #64
>>>> 1c: 01 00 00 d4 svc #0
>>>> 20: c0 03 5f d6 ret
>>>>
>>>> Commands used:
>>>>
>>>> $ clang -target aarch64-linux-gnueabi main.c -O -c -o main.clang.<x>.o
>>>> $ llvm-objdump -d main.clang.<x>.o
>>>
>>> Actually, I'm not sure this is comparable with the reproducer I quoted
>>> in my last reply.
>>>
>>
>> As explained in my previous email, this is the only case that can realistically
>> happen. vDSO has no dependency on any other library (i.e. libgcc you were
>> mentioning) and we are referring to the fallbacks which fall in this category.
>
> Outlining could also introduce a local function call where none exists
> explicitly in the program IIUC.
>
> My point is that the interaction between asm reg vars and machine-level
> procedure calls is at best ill-defined, and it is largely up to the
> compiler when to introduce such a call, even without LTO etc.
>
> So we should not be surprised to see variations in behaviour depending
> on compiler, compiler version and compiler flags.
>

I tested 10 version of the compiler and a part gcc-5.1 that triggers the issue
in a specific case and not in the vdso library, I could not find evidence of the
problem.

>>> The compiler can see the definition of strlen and fully inlines it.
>>> I only ever saw the problem when the compiler emits an out-of-line
>>> implicit function call.
>>>> What does clang do with my example on 32-bit?
>>
>> When clang is selected compat vDSOs are currently disabled on arm64, will be
>> introduced with a future patch series.
>>
>> Anyway since I am curious as well, this is what happens with your example with
>> clang.8 target=arm-linux-gnueabihf:
>>
>> dave-code.clang.8.o: file format ELF32-arm-little
>>
>> Disassembly of section .text:
>> 0000000000000000 foo:
>> 0: 00 00 00 ef svc #0
>> 4: 1e ff 2f e1 bx lr
>>
>> 0000000000000008 bar:
>> 8: 10 4c 2d e9 push {r4, r10, r11, lr}
>> c: 08 b0 8d e2 add r11, sp, #8
>> 10: 00 40 a0 e1 mov r4, r0
>> 14: fe ff ff eb bl #-8 <bar+0xc>
>> 18: 00 10 a0 e1 mov r1, r0
>> 1c: 04 00 a0 e1 mov r0, r4
>> 20: 00 00 00 ef svc #0
>> 24: 10 8c bd e8 pop {r4, r10, r11, pc}
>
>> Compiled with -O2, -O3, -Os never inlines.
>
> Looks sane, and is the behaviour we want.
>
>> Same thing happens for aarch64-linux-gnueabi:
>>
>> dave-code.clang.8.o: file format ELF64-aarch64-little
>>
>> Disassembly of section .text:
>> 0000000000000000 foo:
>> 0: e0 03 00 2a mov w0, w0
>> 4: e1 03 01 2a mov w1, w1
>> 8: 01 00 00 d4 svc #0
>> c: c0 03 5f d6 ret
>>
>> 0000000000000010 bar:
>> 10: 01 0c c1 1a sdiv w1, w0, w1
>> 14: e0 03 00 2a mov w0, w0
>> 18: 01 00 00 d4 svc #0
>> 1c: c0 03 5f d6 ret
>
> Curious, clang seems to be inserting some seemingly redundant moves
> of its own here, though this shouldn't break anything.
>
> I suspect that clang might require an X-reg holding an int to have its
> top 32 bits zeroed for passing to an asm, whereas GCC does not. I think
> this comes under "we should not be surprised to see variations".
>
> GCC 9 does this instead:
>
> 0000000000000000 <foo>:
> 0: d4000001 svc #0x0
> 4: d65f03c0 ret
>
> 0000000000000008 <bar>:
> 8: 1ac10c01 sdiv w1, w0, w1
> c: d4000001 svc #0x0
> 10: d65f03c0 ret
>
>
>> Based on this I think we can conclude our investigation.
>
> So we use non-reg vars and use the asm clobber list and explicit moves
> to get things into / out of the right registers?
>

Since I managed to provide enough evidence, based on the behavior of various
versions of the compilers, that the library as it stands is consistent and does
not suffer any of the issues you reported I think I will keep my code as is at
least for this release, I will revisit it in future if something happens.

If you manage to prove that my library as it stands (no code additions or source
modifications) has the issues you mentioned based on some version of the
compiler, this changes everything.

Happy to hear from you.

> Cheers
> ---Dave
>

--
Regards,
Vincenzo

2019-06-28 13:11:06

by Marek Szyprowski

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

Dear All,

On 2019-06-21 11:52, Vincenzo Frascino wrote:
> To take advantage of the commonly defined vdso interface for
> gettimeofday the architectural code requires an adaptation.
>
> Re-implement the gettimeofday vdso in C in order to use lib/vdso.
>
> With the new implementation arm64 gains support for CLOCK_BOOTTIME
> and CLOCK_TAI.
>
> Cc: Catalin Marinas <[email protected]>
> Cc: Will Deacon <[email protected]>
> Signed-off-by: Vincenzo Frascino <[email protected]>
> Tested-by: Shijith Thotton <[email protected]>
> Tested-by: Andre Przywara <[email protected]>
> Signed-off-by: Catalin Marinas <[email protected]>

This patch causes serious regression on Samsung Exynos5433 SoC based
TM2(e) boards. The time in userspace is always set to begin of the epoch:

# date 062813152019
Fri Jun 28 13:15:00 UTC 2019
# date
Thu Jan  1 00:00:00 UTC 1970
# date
Thu Jan  1 00:00:00 UTC 1970

I've noticed that since the patch landed in Linux next-20190625 and
bisect indeed pointed to this patch.

> ---
> arch/arm64/Kconfig | 2 +
> arch/arm64/include/asm/vdso/gettimeofday.h | 86 ++++++
> arch/arm64/include/asm/vdso/vsyscall.h | 53 ++++
> arch/arm64/include/asm/vdso_datapage.h | 48 ---
> arch/arm64/kernel/asm-offsets.c | 33 +-
> arch/arm64/kernel/vdso.c | 51 +---
> arch/arm64/kernel/vdso/Makefile | 34 ++-
> arch/arm64/kernel/vdso/gettimeofday.S | 334 ---------------------
> arch/arm64/kernel/vdso/vgettimeofday.c | 28 ++
> 9 files changed, 223 insertions(+), 446 deletions(-)
> create mode 100644 arch/arm64/include/asm/vdso/gettimeofday.h
> create mode 100644 arch/arm64/include/asm/vdso/vsyscall.h
> delete mode 100644 arch/arm64/include/asm/vdso_datapage.h
> delete mode 100644 arch/arm64/kernel/vdso/gettimeofday.S
> create mode 100644 arch/arm64/kernel/vdso/vgettimeofday.c
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 697ea0510729..952c9f8cf3b8 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -107,6 +107,7 @@ config ARM64
> select GENERIC_STRNCPY_FROM_USER
> select GENERIC_STRNLEN_USER
> select GENERIC_TIME_VSYSCALL
> + select GENERIC_GETTIMEOFDAY
> select HANDLE_DOMAIN_IRQ
> select HARDIRQS_SW_RESEND
> select HAVE_PCI
> @@ -160,6 +161,7 @@ config ARM64
> select HAVE_SYSCALL_TRACEPOINTS
> select HAVE_KPROBES
> select HAVE_KRETPROBES
> + select HAVE_GENERIC_VDSO
> select IOMMU_DMA if IOMMU_SUPPORT
> select IRQ_DOMAIN
> select IRQ_FORCED_THREADING
> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
> new file mode 100644
> index 000000000000..bc3cb6738051
> --- /dev/null
> +++ b/arch/arm64/include/asm/vdso/gettimeofday.h
> @@ -0,0 +1,86 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2018 ARM Limited
> + */
> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H
> +#define __ASM_VDSO_GETTIMEOFDAY_H
> +
> +#ifndef __ASSEMBLY__
> +
> +#include <asm/unistd.h>
> +#include <uapi/linux/time.h>
> +
> +#define VDSO_HAS_CLOCK_GETRES 1
> +
> +static __always_inline int gettimeofday_fallback(
> + struct __kernel_old_timeval *_tv,
> + struct timezone *_tz)
> +{
> + register struct timezone *tz asm("x1") = _tz;
> + register struct __kernel_old_timeval *tv asm("x0") = _tv;
> + register long ret asm ("x0");
> + register long nr asm("x8") = __NR_gettimeofday;
> +
> + asm volatile(
> + " svc #0\n"
> + : "=r" (ret)
> + : "r" (tv), "r" (tz), "r" (nr)
> + : "memory");
> +
> + return ret;
> +}
> +
> +static __always_inline long clock_gettime_fallback(
> + clockid_t _clkid,
> + struct __kernel_timespec *_ts)
> +{
> + register struct __kernel_timespec *ts asm("x1") = _ts;
> + register clockid_t clkid asm("x0") = _clkid;
> + register long ret asm ("x0");
> + register long nr asm("x8") = __NR_clock_gettime;
> +
> + asm volatile(
> + " svc #0\n"
> + : "=r" (ret)
> + : "r" (clkid), "r" (ts), "r" (nr)
> + : "memory");
> +
> + return ret;
> +}
> +
> +static __always_inline int clock_getres_fallback(
> + clockid_t _clkid,
> + struct __kernel_timespec *_ts)
> +{
> + register struct __kernel_timespec *ts asm("x1") = _ts;
> + register clockid_t clkid asm("x0") = _clkid;
> + register long ret asm ("x0");
> + register long nr asm("x8") = __NR_clock_getres;
> +
> + asm volatile(
> + " svc #0\n"
> + : "=r" (ret)
> + : "r" (clkid), "r" (ts), "r" (nr)
> + : "memory");
> +
> + return ret;
> +}
> +
> +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
> +{
> + u64 res;
> +
> + asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
> +
> + return res;
> +}
> +
> +static __always_inline
> +const struct vdso_data *__arch_get_vdso_data(void)
> +{
> + return _vdso_data;
> +}
> +
> +#endif /* !__ASSEMBLY__ */
> +
> +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
> diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
> new file mode 100644
> index 000000000000..0c731bfc7c8c
> --- /dev/null
> +++ b/arch/arm64/include/asm/vdso/vsyscall.h
> @@ -0,0 +1,53 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ASM_VDSO_VSYSCALL_H
> +#define __ASM_VDSO_VSYSCALL_H
> +
> +#ifndef __ASSEMBLY__
> +
> +#include <linux/timekeeper_internal.h>
> +#include <vdso/datapage.h>
> +
> +#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
> +
> +extern struct vdso_data *vdso_data;
> +
> +/*
> + * Update the vDSO data page to keep in sync with kernel timekeeping.
> + */
> +static __always_inline
> +struct vdso_data *__arm64_get_k_vdso_data(void)
> +{
> + return vdso_data;
> +}
> +#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
> +
> +static __always_inline
> +int __arm64_get_clock_mode(struct timekeeper *tk)
> +{
> + u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
> +
> + return use_syscall;
> +}
> +#define __arch_get_clock_mode __arm64_get_clock_mode
> +
> +static __always_inline
> +int __arm64_use_vsyscall(struct vdso_data *vdata)
> +{
> + return !vdata[CS_HRES_COARSE].clock_mode;
> +}
> +#define __arch_use_vsyscall __arm64_use_vsyscall
> +
> +static __always_inline
> +void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
> +{
> + vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
> + vdata[CS_RAW].mask = VDSO_PRECISION_MASK;
> +}
> +#define __arch_update_vsyscall __arm64_update_vsyscall
> +
> +/* The asm-generic header needs to be included after the definitions above */
> +#include <asm-generic/vdso/vsyscall.h>
> +
> +#endif /* !__ASSEMBLY__ */
> +
> +#endif /* __ASM_VDSO_VSYSCALL_H */
> diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
> deleted file mode 100644
> index f89263c8e11a..000000000000
> --- a/arch/arm64/include/asm/vdso_datapage.h
> +++ /dev/null
> @@ -1,48 +0,0 @@
> -/*
> - * Copyright (C) 2012 ARM Limited
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * This program is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> - * GNU General Public License for more details.
> - *
> - * You should have received a copy of the GNU General Public License
> - * along with this program. If not, see <http://www.gnu.org/licenses/>.
> - */
> -#ifndef __ASM_VDSO_DATAPAGE_H
> -#define __ASM_VDSO_DATAPAGE_H
> -
> -#ifdef __KERNEL__
> -
> -#ifndef __ASSEMBLY__
> -
> -struct vdso_data {
> - __u64 cs_cycle_last; /* Timebase at clocksource init */
> - __u64 raw_time_sec; /* Raw time */
> - __u64 raw_time_nsec;
> - __u64 xtime_clock_sec; /* Kernel time */
> - __u64 xtime_clock_nsec;
> - __u64 xtime_coarse_sec; /* Coarse time */
> - __u64 xtime_coarse_nsec;
> - __u64 wtm_clock_sec; /* Wall to monotonic time */
> - __u64 wtm_clock_nsec;
> - __u32 tb_seq_count; /* Timebase sequence counter */
> - /* cs_* members must be adjacent and in this order (ldp accesses) */
> - __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */
> - __u32 cs_shift; /* Clocksource shift (mono = raw) */
> - __u32 cs_raw_mult; /* Raw clocksource multiplier */
> - __u32 tz_minuteswest; /* Whacky timezone stuff */
> - __u32 tz_dsttime;
> - __u32 use_syscall;
> - __u32 hrtimer_res;
> -};
> -
> -#endif /* !__ASSEMBLY__ */
> -
> -#endif /* __KERNEL__ */
> -
> -#endif /* __ASM_VDSO_DATAPAGE_H */
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 947e39896e28..9e4b7ccbab2f 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -25,13 +25,13 @@
> #include <linux/kvm_host.h>
> #include <linux/preempt.h>
> #include <linux/suspend.h>
> +#include <vdso/datapage.h>
> #include <asm/cpufeature.h>
> #include <asm/fixmap.h>
> #include <asm/thread_info.h>
> #include <asm/memory.h>
> #include <asm/smp_plat.h>
> #include <asm/suspend.h>
> -#include <asm/vdso_datapage.h>
> #include <linux/kbuild.h>
> #include <linux/arm-smccc.h>
>
> @@ -100,17 +100,28 @@ int main(void)
> DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
> DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
> BLANK();
> - DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
> - DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
> - DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
> - DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
> - DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
> - DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
> - DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
> - DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
> - DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
> + DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq));
> + DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode));
> + DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last));
> + DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask));
> + DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult));
> + DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift));
> + DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec));
> + DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec));
> + DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec));
> + DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec));
> + DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec));
> + DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec));
> + DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec));
> + DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec));
> + DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec));
> + DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec));
> + DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec));
> + DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec));
> + DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec));
> + DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec));
> DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
> - DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall));
> + DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
> BLANK();
> DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
> DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
> diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
> index 8074cbd3a3a8..23c38303a52a 100644
> --- a/arch/arm64/kernel/vdso.c
> +++ b/arch/arm64/kernel/vdso.c
> @@ -31,11 +31,13 @@
> #include <linux/slab.h>
> #include <linux/timekeeper_internal.h>
> #include <linux/vmalloc.h>
> +#include <vdso/datapage.h>
> +#include <vdso/helpers.h>
> +#include <vdso/vsyscall.h>
>
> #include <asm/cacheflush.h>
> #include <asm/signal32.h>
> #include <asm/vdso.h>
> -#include <asm/vdso_datapage.h>
>
> extern char vdso_start[], vdso_end[];
> static unsigned long vdso_pages __ro_after_init;
> @@ -44,10 +46,10 @@ static unsigned long vdso_pages __ro_after_init;
> * The vDSO data page.
> */
> static union {
> - struct vdso_data data;
> + struct vdso_data data[CS_BASES];
> u8 page[PAGE_SIZE];
> } vdso_data_store __page_aligned_data;
> -struct vdso_data *vdso_data = &vdso_data_store.data;
> +struct vdso_data *vdso_data = vdso_data_store.data;
>
> #ifdef CONFIG_COMPAT
> /*
> @@ -280,46 +282,3 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
> up_write(&mm->mmap_sem);
> return PTR_ERR(ret);
> }
> -
> -/*
> - * Update the vDSO data page to keep in sync with kernel timekeeping.
> - */
> -void update_vsyscall(struct timekeeper *tk)
> -{
> - u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
> -
> - ++vdso_data->tb_seq_count;
> - smp_wmb();
> -
> - vdso_data->use_syscall = use_syscall;
> - vdso_data->xtime_coarse_sec = tk->xtime_sec;
> - vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
> - tk->tkr_mono.shift;
> - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec;
> - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
> -
> - /* Read without the seqlock held by clock_getres() */
> - WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution);
> -
> - if (!use_syscall) {
> - /* tkr_mono.cycle_last == tkr_raw.cycle_last */
> - vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
> - vdso_data->raw_time_sec = tk->raw_sec;
> - vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
> - vdso_data->xtime_clock_sec = tk->xtime_sec;
> - vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
> - vdso_data->cs_mono_mult = tk->tkr_mono.mult;
> - vdso_data->cs_raw_mult = tk->tkr_raw.mult;
> - /* tkr_mono.shift == tkr_raw.shift */
> - vdso_data->cs_shift = tk->tkr_mono.shift;
> - }
> -
> - smp_wmb();
> - ++vdso_data->tb_seq_count;
> -}
> -
> -void update_vsyscall_tz(void)
> -{
> - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
> - vdso_data->tz_dsttime = sys_tz.tz_dsttime;
> -}
> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
> index fa230ff09aa1..3acfc813e966 100644
> --- a/arch/arm64/kernel/vdso/Makefile
> +++ b/arch/arm64/kernel/vdso/Makefile
> @@ -6,7 +6,12 @@
> # Heavily based on the vDSO Makefiles for other archs.
> #
>
> -obj-vdso := gettimeofday.o note.o sigreturn.o
> +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
> +# the inclusion of generic Makefile.
> +ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64
> +include $(srctree)/lib/vdso/Makefile
> +
> +obj-vdso := vgettimeofday.o note.o sigreturn.o
>
> # Build rules
> targets := $(obj-vdso) vdso.so vdso.so.dbg
> @@ -15,6 +20,24 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
> ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
> --build-id -n -T
>
> +ccflags-y := -fno-common -fno-builtin -fno-stack-protector
> +ccflags-y += -DDISABLE_BRANCH_PROFILING
> +
> +VDSO_LDFLAGS := -Bsymbolic
> +
> +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
> +KBUILD_CFLAGS += $(DISABLE_LTO)
> +KASAN_SANITIZE := n
> +UBSAN_SANITIZE := n
> +OBJECT_FILES_NON_STANDARD := y
> +KCOV_INSTRUMENT := n
> +
> +ifeq ($(c-gettimeofday-y),)
> +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
> +else
> +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
> +endif
> +
> # Disable gcov profiling for VDSO code
> GCOV_PROFILE := n
>
> @@ -28,6 +51,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
> # Link rule for the .so file, .lds has to be first
> $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
> $(call if_changed,ld)
> + $(call if_changed,vdso_check)
>
> # Strip rule for the .so file
> $(obj)/%.so: OBJCOPYFLAGS := -S
> @@ -42,13 +66,9 @@ quiet_cmd_vdsosym = VDSOSYM $@
> include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
> $(call if_changed,vdsosym)
>
> -# Assembly rules for the .S files
> -$(obj-vdso): %.o: %.S FORCE
> - $(call if_changed_dep,vdsoas)
> -
> # Actual build commands
> -quiet_cmd_vdsoas = VDSOA $@
> - cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
> +quiet_cmd_vdsocc = VDSOCC $@
> + cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<
>
> # Install commands for the unstripped file
> quiet_cmd_vdso_install = INSTALL $@
> diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
> deleted file mode 100644
> index 856fee6d3512..000000000000
> --- a/arch/arm64/kernel/vdso/gettimeofday.S
> +++ /dev/null
> @@ -1,334 +0,0 @@
> -/*
> - * Userspace implementations of gettimeofday() and friends.
> - *
> - * Copyright (C) 2012 ARM Limited
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * This program is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> - * GNU General Public License for more details.
> - *
> - * You should have received a copy of the GNU General Public License
> - * along with this program. If not, see <http://www.gnu.org/licenses/>.
> - *
> - * Author: Will Deacon <[email protected]>
> - */
> -
> -#include <linux/linkage.h>
> -#include <asm/asm-offsets.h>
> -#include <asm/unistd.h>
> -
> -#define NSEC_PER_SEC_LO16 0xca00
> -#define NSEC_PER_SEC_HI16 0x3b9a
> -
> -vdso_data .req x6
> -seqcnt .req w7
> -w_tmp .req w8
> -x_tmp .req x8
> -
> -/*
> - * Conventions for macro arguments:
> - * - An argument is write-only if its name starts with "res".
> - * - All other arguments are read-only, unless otherwise specified.
> - */
> -
> - .macro seqcnt_acquire
> -9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
> - tbnz seqcnt, #0, 9999b
> - dmb ishld
> - .endm
> -
> - .macro seqcnt_check fail
> - dmb ishld
> - ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
> - cmp w_tmp, seqcnt
> - b.ne \fail
> - .endm
> -
> - .macro syscall_check fail
> - ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
> - cbnz w_tmp, \fail
> - .endm
> -
> - .macro get_nsec_per_sec res
> - mov \res, #NSEC_PER_SEC_LO16
> - movk \res, #NSEC_PER_SEC_HI16, lsl #16
> - .endm
> -
> - /*
> - * Returns the clock delta, in nanoseconds left-shifted by the clock
> - * shift.
> - */
> - .macro get_clock_shifted_nsec res, cycle_last, mult
> - /* Read the virtual counter. */
> - isb
> - mrs x_tmp, cntvct_el0
> - /* Calculate cycle delta and convert to ns. */
> - sub \res, x_tmp, \cycle_last
> - /* We can only guarantee 56 bits of precision. */
> - movn x_tmp, #0xff00, lsl #48
> - and \res, x_tmp, \res
> - mul \res, \res, \mult
> - /*
> - * Fake address dependency from the value computed from the counter
> - * register to subsequent data page accesses so that the sequence
> - * locking also orders the read of the counter.
> - */
> - and x_tmp, \res, xzr
> - add vdso_data, vdso_data, x_tmp
> - .endm
> -
> - /*
> - * Returns in res_{sec,nsec} the REALTIME timespec, based on the
> - * "wall time" (xtime) and the clock_mono delta.
> - */
> - .macro get_ts_realtime res_sec, res_nsec, \
> - clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
> - add \res_nsec, \clock_nsec, \xtime_nsec
> - udiv x_tmp, \res_nsec, \nsec_to_sec
> - add \res_sec, \xtime_sec, x_tmp
> - msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec
> - .endm
> -
> - /*
> - * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
> - * used for CLOCK_MONOTONIC_RAW.
> - */
> - .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
> - udiv \res_sec, \clock_nsec, \nsec_to_sec
> - msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
> - .endm
> -
> - /* sec and nsec are modified in place. */
> - .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
> - /* Add timespec. */
> - add \sec, \sec, \ts_sec
> - add \nsec, \nsec, \ts_nsec
> -
> - /* Normalise the new timespec. */
> - cmp \nsec, \nsec_to_sec
> - b.lt 9999f
> - sub \nsec, \nsec, \nsec_to_sec
> - add \sec, \sec, #1
> -9999:
> - cmp \nsec, #0
> - b.ge 9998f
> - add \nsec, \nsec, \nsec_to_sec
> - sub \sec, \sec, #1
> -9998:
> - .endm
> -
> - .macro clock_gettime_return, shift=0
> - .if \shift == 1
> - lsr x11, x11, x12
> - .endif
> - stp x10, x11, [x1, #TSPEC_TV_SEC]
> - mov x0, xzr
> - ret
> - .endm
> -
> - .macro jump_slot jumptable, index, label
> - .if (. - \jumptable) != 4 * (\index)
> - .error "Jump slot index mismatch"
> - .endif
> - b \label
> - .endm
> -
> - .text
> -
> -/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
> -ENTRY(__kernel_gettimeofday)
> - .cfi_startproc
> - adr vdso_data, _vdso_data
> - /* If tv is NULL, skip to the timezone code. */
> - cbz x0, 2f
> -
> - /* Compute the time of day. */
> -1: seqcnt_acquire
> - syscall_check fail=4f
> - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
> - /* w11 = cs_mono_mult, w12 = cs_shift */
> - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
> - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
> -
> - get_nsec_per_sec res=x9
> - lsl x9, x9, x12
> -
> - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
> - seqcnt_check fail=1b
> - get_ts_realtime res_sec=x10, res_nsec=x11, \
> - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
> -
> - /* Convert ns to us. */
> - mov x13, #1000
> - lsl x13, x13, x12
> - udiv x11, x11, x13
> - stp x10, x11, [x0, #TVAL_TV_SEC]
> -2:
> - /* If tz is NULL, return 0. */
> - cbz x1, 3f
> - ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
> - stp w4, w5, [x1, #TZ_MINWEST]
> -3:
> - mov x0, xzr
> - ret
> -4:
> - /* Syscall fallback. */
> - mov x8, #__NR_gettimeofday
> - svc #0
> - ret
> - .cfi_endproc
> -ENDPROC(__kernel_gettimeofday)
> -
> -#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
> -
> -/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
> -ENTRY(__kernel_clock_gettime)
> - .cfi_startproc
> - cmp w0, #JUMPSLOT_MAX
> - b.hi syscall
> - adr vdso_data, _vdso_data
> - adr x_tmp, jumptable
> - add x_tmp, x_tmp, w0, uxtw #2
> - br x_tmp
> -
> - ALIGN
> -jumptable:
> - jump_slot jumptable, CLOCK_REALTIME, realtime
> - jump_slot jumptable, CLOCK_MONOTONIC, monotonic
> - b syscall
> - b syscall
> - jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
> - jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
> - jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
> -
> - .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
> - .error "Wrong jumptable size"
> - .endif
> -
> - ALIGN
> -realtime:
> - seqcnt_acquire
> - syscall_check fail=syscall
> - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
> - /* w11 = cs_mono_mult, w12 = cs_shift */
> - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
> - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
> -
> - /* All computations are done with left-shifted nsecs. */
> - get_nsec_per_sec res=x9
> - lsl x9, x9, x12
> -
> - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
> - seqcnt_check fail=realtime
> - get_ts_realtime res_sec=x10, res_nsec=x11, \
> - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
> - clock_gettime_return, shift=1
> -
> - ALIGN
> -monotonic:
> - seqcnt_acquire
> - syscall_check fail=syscall
> - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
> - /* w11 = cs_mono_mult, w12 = cs_shift */
> - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
> - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
> - ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
> -
> - /* All computations are done with left-shifted nsecs. */
> - lsl x4, x4, x12
> - get_nsec_per_sec res=x9
> - lsl x9, x9, x12
> -
> - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
> - seqcnt_check fail=monotonic
> - get_ts_realtime res_sec=x10, res_nsec=x11, \
> - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
> -
> - add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
> - clock_gettime_return, shift=1
> -
> - ALIGN
> -monotonic_raw:
> - seqcnt_acquire
> - syscall_check fail=syscall
> - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
> - /* w11 = cs_raw_mult, w12 = cs_shift */
> - ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT]
> - ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
> -
> - /* All computations are done with left-shifted nsecs. */
> - get_nsec_per_sec res=x9
> - lsl x9, x9, x12
> -
> - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
> - seqcnt_check fail=monotonic_raw
> - get_ts_clock_raw res_sec=x10, res_nsec=x11, \
> - clock_nsec=x15, nsec_to_sec=x9
> -
> - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
> - clock_gettime_return, shift=1
> -
> - ALIGN
> -realtime_coarse:
> - seqcnt_acquire
> - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
> - seqcnt_check fail=realtime_coarse
> - clock_gettime_return
> -
> - ALIGN
> -monotonic_coarse:
> - seqcnt_acquire
> - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
> - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
> - seqcnt_check fail=monotonic_coarse
> -
> - /* Computations are done in (non-shifted) nsecs. */
> - get_nsec_per_sec res=x9
> - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
> - clock_gettime_return
> -
> - ALIGN
> -syscall: /* Syscall fallback. */
> - mov x8, #__NR_clock_gettime
> - svc #0
> - ret
> - .cfi_endproc
> -ENDPROC(__kernel_clock_gettime)
> -
> -/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */
> -ENTRY(__kernel_clock_getres)
> - .cfi_startproc
> - cmp w0, #CLOCK_REALTIME
> - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
> - ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
> - b.ne 1f
> -
> - adr vdso_data, _vdso_data
> - ldr w2, [vdso_data, #CLOCK_REALTIME_RES]
> - b 2f
> -1:
> - cmp w0, #CLOCK_REALTIME_COARSE
> - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
> - b.ne 4f
> - ldr x2, 5f
> -2:
> - cbz x1, 3f
> - stp xzr, x2, [x1]
> -
> -3: /* res == NULL. */
> - mov w0, wzr
> - ret
> -
> -4: /* Syscall fallback. */
> - mov x8, #__NR_clock_getres
> - svc #0
> - ret
> -5:
> - .quad CLOCK_COARSE_RES
> - .cfi_endproc
> -ENDPROC(__kernel_clock_getres)
> diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
> new file mode 100644
> index 000000000000..3c58f19dbdf4
> --- /dev/null
> +++ b/arch/arm64/kernel/vdso/vgettimeofday.c
> @@ -0,0 +1,28 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * ARM64 userspace implementations of gettimeofday() and similar.
> + *
> + * Copyright (C) 2018 ARM Limited
> + *
> + */
> +#include <linux/time.h>
> +#include <linux/types.h>
> +
> +int __kernel_clock_gettime(clockid_t clock,
> + struct __kernel_timespec *ts)
> +{
> + return __cvdso_clock_gettime(clock, ts);
> +}
> +
> +int __kernel_gettimeofday(struct __kernel_old_timeval *tv,
> + struct timezone *tz)
> +{
> + return __cvdso_gettimeofday(tv, tz);
> +}
> +
> +int __kernel_clock_getres(clockid_t clock_id,
> + struct __kernel_timespec *res)
> +{
> + return __cvdso_clock_getres(clock_id, res);
> +}
> +

Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland

2019-06-28 14:31:49

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

Hi Marek,

On 6/28/19 2:09 PM, Marek Szyprowski wrote:
> Dear All,
>
> On 2019-06-21 11:52, Vincenzo Frascino wrote:
>> To take advantage of the commonly defined vdso interface for
>> gettimeofday the architectural code requires an adaptation.
>>
>> Re-implement the gettimeofday vdso in C in order to use lib/vdso.
>>
>> With the new implementation arm64 gains support for CLOCK_BOOTTIME
>> and CLOCK_TAI.
>>
>> Cc: Catalin Marinas <[email protected]>
>> Cc: Will Deacon <[email protected]>
>> Signed-off-by: Vincenzo Frascino <[email protected]>
>> Tested-by: Shijith Thotton <[email protected]>
>> Tested-by: Andre Przywara <[email protected]>
>> Signed-off-by: Catalin Marinas <[email protected]>
>
> This patch causes serious regression on Samsung Exynos5433 SoC based
> TM2(e) boards. The time in userspace is always set to begin of the epoch:
>
> # date 062813152019
> Fri Jun 28 13:15:00 UTC 2019
> # date
> Thu Jan  1 00:00:00 UTC 1970
> # date
> Thu Jan  1 00:00:00 UTC 1970
>
> I've noticed that since the patch landed in Linux next-20190625 and
> bisect indeed pointed to this patch.
>

Thank you for reporting this, seems that the next that you posted is missing
some fixes for arm64.

Could you please try the tree below?

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso

Let us know if the functionality is restored. Otherwise the issue will require
further investigation.

>> ---
>> arch/arm64/Kconfig | 2 +
>> arch/arm64/include/asm/vdso/gettimeofday.h | 86 ++++++
>> arch/arm64/include/asm/vdso/vsyscall.h | 53 ++++
>> arch/arm64/include/asm/vdso_datapage.h | 48 ---
>> arch/arm64/kernel/asm-offsets.c | 33 +-
>> arch/arm64/kernel/vdso.c | 51 +---
>> arch/arm64/kernel/vdso/Makefile | 34 ++-
>> arch/arm64/kernel/vdso/gettimeofday.S | 334 ---------------------
>> arch/arm64/kernel/vdso/vgettimeofday.c | 28 ++
>> 9 files changed, 223 insertions(+), 446 deletions(-)
>> create mode 100644 arch/arm64/include/asm/vdso/gettimeofday.h
>> create mode 100644 arch/arm64/include/asm/vdso/vsyscall.h
>> delete mode 100644 arch/arm64/include/asm/vdso_datapage.h
>> delete mode 100644 arch/arm64/kernel/vdso/gettimeofday.S
>> create mode 100644 arch/arm64/kernel/vdso/vgettimeofday.c
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 697ea0510729..952c9f8cf3b8 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -107,6 +107,7 @@ config ARM64
>> select GENERIC_STRNCPY_FROM_USER
>> select GENERIC_STRNLEN_USER
>> select GENERIC_TIME_VSYSCALL
>> + select GENERIC_GETTIMEOFDAY
>> select HANDLE_DOMAIN_IRQ
>> select HARDIRQS_SW_RESEND
>> select HAVE_PCI
>> @@ -160,6 +161,7 @@ config ARM64
>> select HAVE_SYSCALL_TRACEPOINTS
>> select HAVE_KPROBES
>> select HAVE_KRETPROBES
>> + select HAVE_GENERIC_VDSO
>> select IOMMU_DMA if IOMMU_SUPPORT
>> select IRQ_DOMAIN
>> select IRQ_FORCED_THREADING
>> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
>> new file mode 100644
>> index 000000000000..bc3cb6738051
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/vdso/gettimeofday.h
>> @@ -0,0 +1,86 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2018 ARM Limited
>> + */
>> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H
>> +#define __ASM_VDSO_GETTIMEOFDAY_H
>> +
>> +#ifndef __ASSEMBLY__
>> +
>> +#include <asm/unistd.h>
>> +#include <uapi/linux/time.h>
>> +
>> +#define VDSO_HAS_CLOCK_GETRES 1
>> +
>> +static __always_inline int gettimeofday_fallback(
>> + struct __kernel_old_timeval *_tv,
>> + struct timezone *_tz)
>> +{
>> + register struct timezone *tz asm("x1") = _tz;
>> + register struct __kernel_old_timeval *tv asm("x0") = _tv;
>> + register long ret asm ("x0");
>> + register long nr asm("x8") = __NR_gettimeofday;
>> +
>> + asm volatile(
>> + " svc #0\n"
>> + : "=r" (ret)
>> + : "r" (tv), "r" (tz), "r" (nr)
>> + : "memory");
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline long clock_gettime_fallback(
>> + clockid_t _clkid,
>> + struct __kernel_timespec *_ts)
>> +{
>> + register struct __kernel_timespec *ts asm("x1") = _ts;
>> + register clockid_t clkid asm("x0") = _clkid;
>> + register long ret asm ("x0");
>> + register long nr asm("x8") = __NR_clock_gettime;
>> +
>> + asm volatile(
>> + " svc #0\n"
>> + : "=r" (ret)
>> + : "r" (clkid), "r" (ts), "r" (nr)
>> + : "memory");
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline int clock_getres_fallback(
>> + clockid_t _clkid,
>> + struct __kernel_timespec *_ts)
>> +{
>> + register struct __kernel_timespec *ts asm("x1") = _ts;
>> + register clockid_t clkid asm("x0") = _clkid;
>> + register long ret asm ("x0");
>> + register long nr asm("x8") = __NR_clock_getres;
>> +
>> + asm volatile(
>> + " svc #0\n"
>> + : "=r" (ret)
>> + : "r" (clkid), "r" (ts), "r" (nr)
>> + : "memory");
>> +
>> + return ret;
>> +}
>> +
>> +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
>> +{
>> + u64 res;
>> +
>> + asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
>> +
>> + return res;
>> +}
>> +
>> +static __always_inline
>> +const struct vdso_data *__arch_get_vdso_data(void)
>> +{
>> + return _vdso_data;
>> +}
>> +
>> +#endif /* !__ASSEMBLY__ */
>> +
>> +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
>> diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
>> new file mode 100644
>> index 000000000000..0c731bfc7c8c
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/vdso/vsyscall.h
>> @@ -0,0 +1,53 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef __ASM_VDSO_VSYSCALL_H
>> +#define __ASM_VDSO_VSYSCALL_H
>> +
>> +#ifndef __ASSEMBLY__
>> +
>> +#include <linux/timekeeper_internal.h>
>> +#include <vdso/datapage.h>
>> +
>> +#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
>> +
>> +extern struct vdso_data *vdso_data;
>> +
>> +/*
>> + * Update the vDSO data page to keep in sync with kernel timekeeping.
>> + */
>> +static __always_inline
>> +struct vdso_data *__arm64_get_k_vdso_data(void)
>> +{
>> + return vdso_data;
>> +}
>> +#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
>> +
>> +static __always_inline
>> +int __arm64_get_clock_mode(struct timekeeper *tk)
>> +{
>> + u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
>> +
>> + return use_syscall;
>> +}
>> +#define __arch_get_clock_mode __arm64_get_clock_mode
>> +
>> +static __always_inline
>> +int __arm64_use_vsyscall(struct vdso_data *vdata)
>> +{
>> + return !vdata[CS_HRES_COARSE].clock_mode;
>> +}
>> +#define __arch_use_vsyscall __arm64_use_vsyscall
>> +
>> +static __always_inline
>> +void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
>> +{
>> + vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
>> + vdata[CS_RAW].mask = VDSO_PRECISION_MASK;
>> +}
>> +#define __arch_update_vsyscall __arm64_update_vsyscall
>> +
>> +/* The asm-generic header needs to be included after the definitions above */
>> +#include <asm-generic/vdso/vsyscall.h>
>> +
>> +#endif /* !__ASSEMBLY__ */
>> +
>> +#endif /* __ASM_VDSO_VSYSCALL_H */
>> diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
>> deleted file mode 100644
>> index f89263c8e11a..000000000000
>> --- a/arch/arm64/include/asm/vdso_datapage.h
>> +++ /dev/null
>> @@ -1,48 +0,0 @@
>> -/*
>> - * Copyright (C) 2012 ARM Limited
>> - *
>> - * This program is free software; you can redistribute it and/or modify
>> - * it under the terms of the GNU General Public License version 2 as
>> - * published by the Free Software Foundation.
>> - *
>> - * This program is distributed in the hope that it will be useful,
>> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> - * GNU General Public License for more details.
>> - *
>> - * You should have received a copy of the GNU General Public License
>> - * along with this program. If not, see <http://www.gnu.org/licenses/>.
>> - */
>> -#ifndef __ASM_VDSO_DATAPAGE_H
>> -#define __ASM_VDSO_DATAPAGE_H
>> -
>> -#ifdef __KERNEL__
>> -
>> -#ifndef __ASSEMBLY__
>> -
>> -struct vdso_data {
>> - __u64 cs_cycle_last; /* Timebase at clocksource init */
>> - __u64 raw_time_sec; /* Raw time */
>> - __u64 raw_time_nsec;
>> - __u64 xtime_clock_sec; /* Kernel time */
>> - __u64 xtime_clock_nsec;
>> - __u64 xtime_coarse_sec; /* Coarse time */
>> - __u64 xtime_coarse_nsec;
>> - __u64 wtm_clock_sec; /* Wall to monotonic time */
>> - __u64 wtm_clock_nsec;
>> - __u32 tb_seq_count; /* Timebase sequence counter */
>> - /* cs_* members must be adjacent and in this order (ldp accesses) */
>> - __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */
>> - __u32 cs_shift; /* Clocksource shift (mono = raw) */
>> - __u32 cs_raw_mult; /* Raw clocksource multiplier */
>> - __u32 tz_minuteswest; /* Whacky timezone stuff */
>> - __u32 tz_dsttime;
>> - __u32 use_syscall;
>> - __u32 hrtimer_res;
>> -};
>> -
>> -#endif /* !__ASSEMBLY__ */
>> -
>> -#endif /* __KERNEL__ */
>> -
>> -#endif /* __ASM_VDSO_DATAPAGE_H */
>> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
>> index 947e39896e28..9e4b7ccbab2f 100644
>> --- a/arch/arm64/kernel/asm-offsets.c
>> +++ b/arch/arm64/kernel/asm-offsets.c
>> @@ -25,13 +25,13 @@
>> #include <linux/kvm_host.h>
>> #include <linux/preempt.h>
>> #include <linux/suspend.h>
>> +#include <vdso/datapage.h>
>> #include <asm/cpufeature.h>
>> #include <asm/fixmap.h>
>> #include <asm/thread_info.h>
>> #include <asm/memory.h>
>> #include <asm/smp_plat.h>
>> #include <asm/suspend.h>
>> -#include <asm/vdso_datapage.h>
>> #include <linux/kbuild.h>
>> #include <linux/arm-smccc.h>
>>
>> @@ -100,17 +100,28 @@ int main(void)
>> DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
>> DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
>> BLANK();
>> - DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
>> - DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
>> - DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
>> - DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
>> - DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
>> - DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
>> - DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
>> - DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
>> - DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
>> + DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq));
>> + DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode));
>> + DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last));
>> + DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask));
>> + DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult));
>> + DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift));
>> + DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec));
>> + DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec));
>> + DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec));
>> + DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec));
>> + DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec));
>> + DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec));
>> + DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec));
>> + DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec));
>> + DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec));
>> + DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec));
>> + DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec));
>> + DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec));
>> + DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec));
>> + DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec));
>> DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
>> - DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall));
>> + DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
>> BLANK();
>> DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
>> DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
>> diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
>> index 8074cbd3a3a8..23c38303a52a 100644
>> --- a/arch/arm64/kernel/vdso.c
>> +++ b/arch/arm64/kernel/vdso.c
>> @@ -31,11 +31,13 @@
>> #include <linux/slab.h>
>> #include <linux/timekeeper_internal.h>
>> #include <linux/vmalloc.h>
>> +#include <vdso/datapage.h>
>> +#include <vdso/helpers.h>
>> +#include <vdso/vsyscall.h>
>>
>> #include <asm/cacheflush.h>
>> #include <asm/signal32.h>
>> #include <asm/vdso.h>
>> -#include <asm/vdso_datapage.h>
>>
>> extern char vdso_start[], vdso_end[];
>> static unsigned long vdso_pages __ro_after_init;
>> @@ -44,10 +46,10 @@ static unsigned long vdso_pages __ro_after_init;
>> * The vDSO data page.
>> */
>> static union {
>> - struct vdso_data data;
>> + struct vdso_data data[CS_BASES];
>> u8 page[PAGE_SIZE];
>> } vdso_data_store __page_aligned_data;
>> -struct vdso_data *vdso_data = &vdso_data_store.data;
>> +struct vdso_data *vdso_data = vdso_data_store.data;
>>
>> #ifdef CONFIG_COMPAT
>> /*
>> @@ -280,46 +282,3 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
>> up_write(&mm->mmap_sem);
>> return PTR_ERR(ret);
>> }
>> -
>> -/*
>> - * Update the vDSO data page to keep in sync with kernel timekeeping.
>> - */
>> -void update_vsyscall(struct timekeeper *tk)
>> -{
>> - u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
>> -
>> - ++vdso_data->tb_seq_count;
>> - smp_wmb();
>> -
>> - vdso_data->use_syscall = use_syscall;
>> - vdso_data->xtime_coarse_sec = tk->xtime_sec;
>> - vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
>> - tk->tkr_mono.shift;
>> - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec;
>> - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
>> -
>> - /* Read without the seqlock held by clock_getres() */
>> - WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution);
>> -
>> - if (!use_syscall) {
>> - /* tkr_mono.cycle_last == tkr_raw.cycle_last */
>> - vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
>> - vdso_data->raw_time_sec = tk->raw_sec;
>> - vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
>> - vdso_data->xtime_clock_sec = tk->xtime_sec;
>> - vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
>> - vdso_data->cs_mono_mult = tk->tkr_mono.mult;
>> - vdso_data->cs_raw_mult = tk->tkr_raw.mult;
>> - /* tkr_mono.shift == tkr_raw.shift */
>> - vdso_data->cs_shift = tk->tkr_mono.shift;
>> - }
>> -
>> - smp_wmb();
>> - ++vdso_data->tb_seq_count;
>> -}
>> -
>> -void update_vsyscall_tz(void)
>> -{
>> - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
>> - vdso_data->tz_dsttime = sys_tz.tz_dsttime;
>> -}
>> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
>> index fa230ff09aa1..3acfc813e966 100644
>> --- a/arch/arm64/kernel/vdso/Makefile
>> +++ b/arch/arm64/kernel/vdso/Makefile
>> @@ -6,7 +6,12 @@
>> # Heavily based on the vDSO Makefiles for other archs.
>> #
>>
>> -obj-vdso := gettimeofday.o note.o sigreturn.o
>> +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
>> +# the inclusion of generic Makefile.
>> +ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64
>> +include $(srctree)/lib/vdso/Makefile
>> +
>> +obj-vdso := vgettimeofday.o note.o sigreturn.o
>>
>> # Build rules
>> targets := $(obj-vdso) vdso.so vdso.so.dbg
>> @@ -15,6 +20,24 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
>> ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
>> --build-id -n -T
>>
>> +ccflags-y := -fno-common -fno-builtin -fno-stack-protector
>> +ccflags-y += -DDISABLE_BRANCH_PROFILING
>> +
>> +VDSO_LDFLAGS := -Bsymbolic
>> +
>> +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
>> +KBUILD_CFLAGS += $(DISABLE_LTO)
>> +KASAN_SANITIZE := n
>> +UBSAN_SANITIZE := n
>> +OBJECT_FILES_NON_STANDARD := y
>> +KCOV_INSTRUMENT := n
>> +
>> +ifeq ($(c-gettimeofday-y),)
>> +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
>> +else
>> +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
>> +endif
>> +
>> # Disable gcov profiling for VDSO code
>> GCOV_PROFILE := n
>>
>> @@ -28,6 +51,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
>> # Link rule for the .so file, .lds has to be first
>> $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
>> $(call if_changed,ld)
>> + $(call if_changed,vdso_check)
>>
>> # Strip rule for the .so file
>> $(obj)/%.so: OBJCOPYFLAGS := -S
>> @@ -42,13 +66,9 @@ quiet_cmd_vdsosym = VDSOSYM $@
>> include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
>> $(call if_changed,vdsosym)
>>
>> -# Assembly rules for the .S files
>> -$(obj-vdso): %.o: %.S FORCE
>> - $(call if_changed_dep,vdsoas)
>> -
>> # Actual build commands
>> -quiet_cmd_vdsoas = VDSOA $@
>> - cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
>> +quiet_cmd_vdsocc = VDSOCC $@
>> + cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<
>>
>> # Install commands for the unstripped file
>> quiet_cmd_vdso_install = INSTALL $@
>> diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
>> deleted file mode 100644
>> index 856fee6d3512..000000000000
>> --- a/arch/arm64/kernel/vdso/gettimeofday.S
>> +++ /dev/null
>> @@ -1,334 +0,0 @@
>> -/*
>> - * Userspace implementations of gettimeofday() and friends.
>> - *
>> - * Copyright (C) 2012 ARM Limited
>> - *
>> - * This program is free software; you can redistribute it and/or modify
>> - * it under the terms of the GNU General Public License version 2 as
>> - * published by the Free Software Foundation.
>> - *
>> - * This program is distributed in the hope that it will be useful,
>> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> - * GNU General Public License for more details.
>> - *
>> - * You should have received a copy of the GNU General Public License
>> - * along with this program. If not, see <http://www.gnu.org/licenses/>.
>> - *
>> - * Author: Will Deacon <[email protected]>
>> - */
>> -
>> -#include <linux/linkage.h>
>> -#include <asm/asm-offsets.h>
>> -#include <asm/unistd.h>
>> -
>> -#define NSEC_PER_SEC_LO16 0xca00
>> -#define NSEC_PER_SEC_HI16 0x3b9a
>> -
>> -vdso_data .req x6
>> -seqcnt .req w7
>> -w_tmp .req w8
>> -x_tmp .req x8
>> -
>> -/*
>> - * Conventions for macro arguments:
>> - * - An argument is write-only if its name starts with "res".
>> - * - All other arguments are read-only, unless otherwise specified.
>> - */
>> -
>> - .macro seqcnt_acquire
>> -9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
>> - tbnz seqcnt, #0, 9999b
>> - dmb ishld
>> - .endm
>> -
>> - .macro seqcnt_check fail
>> - dmb ishld
>> - ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
>> - cmp w_tmp, seqcnt
>> - b.ne \fail
>> - .endm
>> -
>> - .macro syscall_check fail
>> - ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
>> - cbnz w_tmp, \fail
>> - .endm
>> -
>> - .macro get_nsec_per_sec res
>> - mov \res, #NSEC_PER_SEC_LO16
>> - movk \res, #NSEC_PER_SEC_HI16, lsl #16
>> - .endm
>> -
>> - /*
>> - * Returns the clock delta, in nanoseconds left-shifted by the clock
>> - * shift.
>> - */
>> - .macro get_clock_shifted_nsec res, cycle_last, mult
>> - /* Read the virtual counter. */
>> - isb
>> - mrs x_tmp, cntvct_el0
>> - /* Calculate cycle delta and convert to ns. */
>> - sub \res, x_tmp, \cycle_last
>> - /* We can only guarantee 56 bits of precision. */
>> - movn x_tmp, #0xff00, lsl #48
>> - and \res, x_tmp, \res
>> - mul \res, \res, \mult
>> - /*
>> - * Fake address dependency from the value computed from the counter
>> - * register to subsequent data page accesses so that the sequence
>> - * locking also orders the read of the counter.
>> - */
>> - and x_tmp, \res, xzr
>> - add vdso_data, vdso_data, x_tmp
>> - .endm
>> -
>> - /*
>> - * Returns in res_{sec,nsec} the REALTIME timespec, based on the
>> - * "wall time" (xtime) and the clock_mono delta.
>> - */
>> - .macro get_ts_realtime res_sec, res_nsec, \
>> - clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
>> - add \res_nsec, \clock_nsec, \xtime_nsec
>> - udiv x_tmp, \res_nsec, \nsec_to_sec
>> - add \res_sec, \xtime_sec, x_tmp
>> - msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec
>> - .endm
>> -
>> - /*
>> - * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
>> - * used for CLOCK_MONOTONIC_RAW.
>> - */
>> - .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
>> - udiv \res_sec, \clock_nsec, \nsec_to_sec
>> - msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
>> - .endm
>> -
>> - /* sec and nsec are modified in place. */
>> - .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
>> - /* Add timespec. */
>> - add \sec, \sec, \ts_sec
>> - add \nsec, \nsec, \ts_nsec
>> -
>> - /* Normalise the new timespec. */
>> - cmp \nsec, \nsec_to_sec
>> - b.lt 9999f
>> - sub \nsec, \nsec, \nsec_to_sec
>> - add \sec, \sec, #1
>> -9999:
>> - cmp \nsec, #0
>> - b.ge 9998f
>> - add \nsec, \nsec, \nsec_to_sec
>> - sub \sec, \sec, #1
>> -9998:
>> - .endm
>> -
>> - .macro clock_gettime_return, shift=0
>> - .if \shift == 1
>> - lsr x11, x11, x12
>> - .endif
>> - stp x10, x11, [x1, #TSPEC_TV_SEC]
>> - mov x0, xzr
>> - ret
>> - .endm
>> -
>> - .macro jump_slot jumptable, index, label
>> - .if (. - \jumptable) != 4 * (\index)
>> - .error "Jump slot index mismatch"
>> - .endif
>> - b \label
>> - .endm
>> -
>> - .text
>> -
>> -/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
>> -ENTRY(__kernel_gettimeofday)
>> - .cfi_startproc
>> - adr vdso_data, _vdso_data
>> - /* If tv is NULL, skip to the timezone code. */
>> - cbz x0, 2f
>> -
>> - /* Compute the time of day. */
>> -1: seqcnt_acquire
>> - syscall_check fail=4f
>> - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
>> - /* w11 = cs_mono_mult, w12 = cs_shift */
>> - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
>> - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
>> -
>> - get_nsec_per_sec res=x9
>> - lsl x9, x9, x12
>> -
>> - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
>> - seqcnt_check fail=1b
>> - get_ts_realtime res_sec=x10, res_nsec=x11, \
>> - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
>> -
>> - /* Convert ns to us. */
>> - mov x13, #1000
>> - lsl x13, x13, x12
>> - udiv x11, x11, x13
>> - stp x10, x11, [x0, #TVAL_TV_SEC]
>> -2:
>> - /* If tz is NULL, return 0. */
>> - cbz x1, 3f
>> - ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
>> - stp w4, w5, [x1, #TZ_MINWEST]
>> -3:
>> - mov x0, xzr
>> - ret
>> -4:
>> - /* Syscall fallback. */
>> - mov x8, #__NR_gettimeofday
>> - svc #0
>> - ret
>> - .cfi_endproc
>> -ENDPROC(__kernel_gettimeofday)
>> -
>> -#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
>> -
>> -/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
>> -ENTRY(__kernel_clock_gettime)
>> - .cfi_startproc
>> - cmp w0, #JUMPSLOT_MAX
>> - b.hi syscall
>> - adr vdso_data, _vdso_data
>> - adr x_tmp, jumptable
>> - add x_tmp, x_tmp, w0, uxtw #2
>> - br x_tmp
>> -
>> - ALIGN
>> -jumptable:
>> - jump_slot jumptable, CLOCK_REALTIME, realtime
>> - jump_slot jumptable, CLOCK_MONOTONIC, monotonic
>> - b syscall
>> - b syscall
>> - jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
>> - jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
>> - jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
>> -
>> - .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
>> - .error "Wrong jumptable size"
>> - .endif
>> -
>> - ALIGN
>> -realtime:
>> - seqcnt_acquire
>> - syscall_check fail=syscall
>> - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
>> - /* w11 = cs_mono_mult, w12 = cs_shift */
>> - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
>> - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
>> -
>> - /* All computations are done with left-shifted nsecs. */
>> - get_nsec_per_sec res=x9
>> - lsl x9, x9, x12
>> -
>> - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
>> - seqcnt_check fail=realtime
>> - get_ts_realtime res_sec=x10, res_nsec=x11, \
>> - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
>> - clock_gettime_return, shift=1
>> -
>> - ALIGN
>> -monotonic:
>> - seqcnt_acquire
>> - syscall_check fail=syscall
>> - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
>> - /* w11 = cs_mono_mult, w12 = cs_shift */
>> - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
>> - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
>> - ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
>> -
>> - /* All computations are done with left-shifted nsecs. */
>> - lsl x4, x4, x12
>> - get_nsec_per_sec res=x9
>> - lsl x9, x9, x12
>> -
>> - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
>> - seqcnt_check fail=monotonic
>> - get_ts_realtime res_sec=x10, res_nsec=x11, \
>> - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
>> -
>> - add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
>> - clock_gettime_return, shift=1
>> -
>> - ALIGN
>> -monotonic_raw:
>> - seqcnt_acquire
>> - syscall_check fail=syscall
>> - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
>> - /* w11 = cs_raw_mult, w12 = cs_shift */
>> - ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT]
>> - ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
>> -
>> - /* All computations are done with left-shifted nsecs. */
>> - get_nsec_per_sec res=x9
>> - lsl x9, x9, x12
>> -
>> - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
>> - seqcnt_check fail=monotonic_raw
>> - get_ts_clock_raw res_sec=x10, res_nsec=x11, \
>> - clock_nsec=x15, nsec_to_sec=x9
>> -
>> - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
>> - clock_gettime_return, shift=1
>> -
>> - ALIGN
>> -realtime_coarse:
>> - seqcnt_acquire
>> - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
>> - seqcnt_check fail=realtime_coarse
>> - clock_gettime_return
>> -
>> - ALIGN
>> -monotonic_coarse:
>> - seqcnt_acquire
>> - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
>> - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
>> - seqcnt_check fail=monotonic_coarse
>> -
>> - /* Computations are done in (non-shifted) nsecs. */
>> - get_nsec_per_sec res=x9
>> - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
>> - clock_gettime_return
>> -
>> - ALIGN
>> -syscall: /* Syscall fallback. */
>> - mov x8, #__NR_clock_gettime
>> - svc #0
>> - ret
>> - .cfi_endproc
>> -ENDPROC(__kernel_clock_gettime)
>> -
>> -/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */
>> -ENTRY(__kernel_clock_getres)
>> - .cfi_startproc
>> - cmp w0, #CLOCK_REALTIME
>> - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
>> - ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
>> - b.ne 1f
>> -
>> - adr vdso_data, _vdso_data
>> - ldr w2, [vdso_data, #CLOCK_REALTIME_RES]
>> - b 2f
>> -1:
>> - cmp w0, #CLOCK_REALTIME_COARSE
>> - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
>> - b.ne 4f
>> - ldr x2, 5f
>> -2:
>> - cbz x1, 3f
>> - stp xzr, x2, [x1]
>> -
>> -3: /* res == NULL. */
>> - mov w0, wzr
>> - ret
>> -
>> -4: /* Syscall fallback. */
>> - mov x8, #__NR_clock_getres
>> - svc #0
>> - ret
>> -5:
>> - .quad CLOCK_COARSE_RES
>> - .cfi_endproc
>> -ENDPROC(__kernel_clock_getres)
>> diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
>> new file mode 100644
>> index 000000000000..3c58f19dbdf4
>> --- /dev/null
>> +++ b/arch/arm64/kernel/vdso/vgettimeofday.c
>> @@ -0,0 +1,28 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * ARM64 userspace implementations of gettimeofday() and similar.
>> + *
>> + * Copyright (C) 2018 ARM Limited
>> + *
>> + */
>> +#include <linux/time.h>
>> +#include <linux/types.h>
>> +
>> +int __kernel_clock_gettime(clockid_t clock,
>> + struct __kernel_timespec *ts)
>> +{
>> + return __cvdso_clock_gettime(clock, ts);
>> +}
>> +
>> +int __kernel_gettimeofday(struct __kernel_old_timeval *tv,
>> + struct timezone *tz)
>> +{
>> + return __cvdso_gettimeofday(tv, tz);
>> +}
>> +
>> +int __kernel_clock_getres(clockid_t clock_id,
>> + struct __kernel_timespec *res)
>> +{
>> + return __cvdso_clock_getres(clock_id, res);
>> +}
>> +
>
> Best regards
>

--
Regards,
Vincenzo

2019-06-28 16:50:43

by Sylwester Nawrocki

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

Hi Vincenzo,

On 6/28/19 16:32, Vincenzo Frascino wrote:
> On 6/28/19 2:09 PM, Marek Szyprowski wrote:
>> On 2019-06-21 11:52, Vincenzo Frascino wrote:
>>> To take advantage of the commonly defined vdso interface for
>>> gettimeofday the architectural code requires an adaptation.
>>>
>>> Re-implement the gettimeofday vdso in C in order to use lib/vdso.
>>>
>>> With the new implementation arm64 gains support for CLOCK_BOOTTIME
>>> and CLOCK_TAI.
>>>
>>> Cc: Catalin Marinas <[email protected]>
>>> Cc: Will Deacon <[email protected]>
>>> Signed-off-by: Vincenzo Frascino <[email protected]>
>>> Tested-by: Shijith Thotton <[email protected]>
>>> Tested-by: Andre Przywara <[email protected]>
>>> Signed-off-by: Catalin Marinas <[email protected]>
>>
>> This patch causes serious regression on Samsung Exynos5433 SoC based
>> TM2(e) boards. The time in userspace is always set to begin of the epoch:
>>
>> # date 062813152019
>> Fri Jun 28 13:15:00 UTC 2019
>> # date
>> Thu Jan  1 00:00:00 UTC 1970
>> # date
>> Thu Jan  1 00:00:00 UTC 1970
>>
>> I've noticed that since the patch landed in Linux next-20190625 and
>> bisect indeed pointed to this patch.
>>
> Thank you for reporting this, seems that the next that you posted is missing
> some fixes for arm64.
>
> Could you please try the tree below?
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso
>
> Let us know if the functionality is restored. Otherwise the issue will require
> further investigation.

Marek is already out for holidays, I gave your tree a try but kernel from
that branch was failing to boot on TM2(e).

Then I have cherry-picked 5 patches from the branch that seemed to
be missing in next-20190628:

28028f3174cf1 (HEAD) MAINTAINERS: Fix Andy's surname and the directory entries of VDSO
ec8f8e4bf2206 arm64: vdso: Fix compilation with clang older than 8
721882ebb5729 arm64: compat: Fix __arch_get_hw_counter() implementation
7027fea977a3d arm64: Fix __arch_get_hw_counter() implementation
10b305853fe22 lib/vdso: Make delta calculation work correctly
48568d8c7f479 (tag: next-20190628, linux-next/master) Add linux-next specific files for 20190628

With those 5 additional patches on top of next-20190628 the problem
is not observed any more. date, ping, etc. seems to be working well.

# date
Fri Jun 28 16:39:22 UTC 2019
#
# systemctl stop systemd-timesyncd
#
# date 062818392019
Fri Jun 28 18:39:00 UTC 2019
# date
Fri Jun 28 18:39:01 UTC 2019
#
# date 062818432019; date
Fri Jun 28 18:43:00 UTC 2019
Fri Jun 28 18:43:00 UTC 2019
# date
Fri Jun 28 18:43:04 UTC 2019

--
Regards,
Sylwester

2019-06-29 06:57:55

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

Hi Sylwester,

thank you for the quick turn around to my email.

On 6/28/19 5:50 PM, Sylwester Nawrocki wrote:
> Hi Vincenzo,
>
> On 6/28/19 16:32, Vincenzo Frascino wrote:
>> On 6/28/19 2:09 PM, Marek Szyprowski wrote:
>>> On 2019-06-21 11:52, Vincenzo Frascino wrote:
>>>> To take advantage of the commonly defined vdso interface for
>>>> gettimeofday the architectural code requires an adaptation.
>>>>
>>>> Re-implement the gettimeofday vdso in C in order to use lib/vdso.
>>>>
>>>> With the new implementation arm64 gains support for CLOCK_BOOTTIME
>>>> and CLOCK_TAI.
>>>>
>>>> Cc: Catalin Marinas <[email protected]>
>>>> Cc: Will Deacon <[email protected]>
>>>> Signed-off-by: Vincenzo Frascino <[email protected]>
>>>> Tested-by: Shijith Thotton <[email protected]>
>>>> Tested-by: Andre Przywara <[email protected]>
>>>> Signed-off-by: Catalin Marinas <[email protected]>
>>>
>>> This patch causes serious regression on Samsung Exynos5433 SoC based
>>> TM2(e) boards. The time in userspace is always set to begin of the epoch:
>>>
>>> # date 062813152019
>>> Fri Jun 28 13:15:00 UTC 2019
>>> # date
>>> Thu Jan  1 00:00:00 UTC 1970
>>> # date
>>> Thu Jan  1 00:00:00 UTC 1970
>>>
>>> I've noticed that since the patch landed in Linux next-20190625 and
>>> bisect indeed pointed to this patch.
>>>
>> Thank you for reporting this, seems that the next that you posted is missing
>> some fixes for arm64.
>>
>> Could you please try the tree below?
>>
>> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso
>>
>> Let us know if the functionality is restored. Otherwise the issue will require
>> further investigation.
>
> Marek is already out for holidays, I gave your tree a try but kernel from
> that branch was failing to boot on TM2(e).
>
> Then I have cherry-picked 5 patches from the branch that seemed to
> be missing in next-20190628:
>
> 28028f3174cf1 (HEAD) MAINTAINERS: Fix Andy's surname and the directory entries of VDSO
> ec8f8e4bf2206 arm64: vdso: Fix compilation with clang older than 8
> 721882ebb5729 arm64: compat: Fix __arch_get_hw_counter() implementation
> 7027fea977a3d arm64: Fix __arch_get_hw_counter() implementation
> 10b305853fe22 lib/vdso: Make delta calculation work correctly
> 48568d8c7f479 (tag: next-20190628, linux-next/master) Add linux-next specific files for 20190628
>
> With those 5 additional patches on top of next-20190628 the problem
> is not observed any more. date, ping, etc. seems to be working well.
>
> # date
> Fri Jun 28 16:39:22 UTC 2019
> #
> # systemctl stop systemd-timesyncd
> #
> # date 062818392019
> Fri Jun 28 18:39:00 UTC 2019
> # date
> Fri Jun 28 18:39:01 UTC 2019
> #
> # date 062818432019; date
> Fri Jun 28 18:43:00 UTC 2019
> Fri Jun 28 18:43:00 UTC 2019
> # date
> Fri Jun 28 18:43:04 UTC 2019
>

This seems ok, thanks for spending some time to test our patches against your board.

If I may, I would like to ask to you one favor, could you please keep an eye on
next and once those patches are merged repeat the test?

I want just to make sure that the regression does not reappear.

Have a nice weekend.

> --
> Regards,
> Sylwester
>

--
Regards,
Vincenzo

2019-07-08 17:50:48

by Sylwester Nawrocki

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

Hi Vincenzo,

On 6/29/19 08:58, Vincenzo Frascino wrote:
> If I may, I would like to ask to you one favor, could you please keep an eye on
> next and once those patches are merged repeat the test?
>
> I want just to make sure that the regression does not reappear.

My apologies, I forgot about this for a moment. I repeated the test with
next-20190705 tag and couldn't see any regressions.

--
Regards,
Sylwester

2019-07-08 17:51:38

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH v7 04/25] arm64: Substitute gettimeofday with C implementation

Hi Sylwester,

On 08/07/2019 13:57, Sylwester Nawrocki wrote:
> Hi Vincenzo,
>
> On 6/29/19 08:58, Vincenzo Frascino wrote:
>> If I may, I would like to ask to you one favor, could you please keep an eye on
>> next and once those patches are merged repeat the test?
>>
>> I want just to make sure that the regression does not reappear.
>
> My apologies, I forgot about this for a moment. I repeated the test with
> next-20190705 tag and couldn't see any regressions.
>

No problem and thank you for the confirmation.

--
Regards,
Vincenzo