This series adds support for the CLOCK_MONOTONIC_RAW clock in the x86
vDSO, thus decreasing its readout cost. This is particularly useful
for Wine which needs to implement Win32 API clock functions whose
value does not get adjusted with adjtimex().
Cc: Thomas Gleixner <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Huw Davies (3):
x86/vdso: Remove unused 'mask' member
x86/vdso: Allow clock specific mult and shift values
x86/vdso: Add support for CLOCK_MONOTONIC_RAW in the vDSO
arch/x86/entry/vdso/vclock_gettime.c | 4 ++--
arch/x86/entry/vsyscall/vsyscall_gtod.c | 15 ++++++++++++---
arch/x86/include/asm/vgtod.h | 10 +++++-----
3 files changed, 19 insertions(+), 10 deletions(-)
--
2.17.1
This will allow clocks with different mult and shift values,
e.g. CLOCK_MONOTONIC_RAW, to be supported in the vDSO.
The coarse clocks do not require these data so the values are not
copied for these clocks.
One could add potential new values of mult and shift alongside the
existing values in struct vsyscall_gtod_data, but it seems more
natural to group them with the actual clock data in the basetime array
at the expense of a few more cycles in update_vsyscall().
Cc: Thomas Gleixner <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Signed-off-by: Huw Davies <[email protected]>
---
arch/x86/entry/vdso/vclock_gettime.c | 4 ++--
arch/x86/entry/vsyscall/vsyscall_gtod.c | 8 ++++++--
arch/x86/include/asm/vgtod.h | 6 +++---
3 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 007b3fe9d727..a4199b846d77 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -153,8 +153,8 @@ notrace static int do_hres(clockid_t clk, struct timespec *ts)
if (unlikely((s64)cycles < 0))
return vdso_fallback_gettime(clk, ts);
if (cycles > last)
- ns += (cycles - last) * gtod->mult;
- ns >>= gtod->shift;
+ ns += (cycles - last) * base->mult;
+ ns >>= base->shift;
sec = base->sec;
} while (unlikely(gtod_read_retry(gtod, seq)));
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index e4ee83018279..ddc6a71df87c 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -43,16 +43,18 @@ void update_vsyscall(struct timekeeper *tk)
/* copy vsyscall data */
vdata->vclock_mode = vclock_mode;
vdata->cycle_last = tk->tkr_mono.cycle_last;
- vdata->mult = tk->tkr_mono.mult;
- vdata->shift = tk->tkr_mono.shift;
base = &vdata->basetime[CLOCK_REALTIME];
base->sec = tk->xtime_sec;
base->nsec = tk->tkr_mono.xtime_nsec;
+ base->mult = tk->tkr_mono.mult;
+ base->shift = tk->tkr_mono.shift;
base = &vdata->basetime[CLOCK_TAI];
base->sec = tk->xtime_sec + (s64)tk->tai_offset;
base->nsec = tk->tkr_mono.xtime_nsec;
+ base->mult = tk->tkr_mono.mult;
+ base->shift = tk->tkr_mono.shift;
base = &vdata->basetime[CLOCK_MONOTONIC];
base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
@@ -63,6 +65,8 @@ void update_vsyscall(struct timekeeper *tk)
base->sec++;
}
base->nsec = nsec;
+ base->mult = tk->tkr_mono.mult;
+ base->shift = tk->tkr_mono.shift;
base = &vdata->basetime[CLOCK_REALTIME_COARSE];
base->sec = tk->xtime_sec;
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index daf69a25e46b..ae0d76491595 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -20,11 +20,13 @@ typedef unsigned long gtod_long_t;
* clocks, this encodes the actual time.
*
* To confuse the reader, for high-resolution clocks, nsec is left-shifted
- * by vsyscall_gtod_data.shift.
+ * by shift.
*/
struct vgtod_ts {
u64 sec;
u64 nsec;
+ u32 mult;
+ u32 shift;
};
#define VGTOD_BASES (CLOCK_TAI + 1)
@@ -40,8 +42,6 @@ struct vsyscall_gtod_data {
int vclock_mode;
u64 cycle_last;
- u32 mult;
- u32 shift;
struct vgtod_ts basetime[VGTOD_BASES];
--
2.17.1
The 'mask' member of struct vsyscall_gtod_data is unused, so remove
it. Its use was removed in commit a51e996d48ac (x86/vdso: Enforce
64bit clocksource).
Cc: Thomas Gleixner <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Signed-off-by: Huw Davies <[email protected]>
---
arch/x86/entry/vsyscall/vsyscall_gtod.c | 1 -
arch/x86/include/asm/vgtod.h | 1 -
2 files changed, 2 deletions(-)
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index cfcdba082feb..e4ee83018279 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -43,7 +43,6 @@ void update_vsyscall(struct timekeeper *tk)
/* copy vsyscall data */
vdata->vclock_mode = vclock_mode;
vdata->cycle_last = tk->tkr_mono.cycle_last;
- vdata->mask = tk->tkr_mono.mask;
vdata->mult = tk->tkr_mono.mult;
vdata->shift = tk->tkr_mono.shift;
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 913a133f8e6f..daf69a25e46b 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -40,7 +40,6 @@ struct vsyscall_gtod_data {
int vclock_mode;
u64 cycle_last;
- u64 mask;
u32 mult;
u32 shift;
--
2.17.1
This is particularly useful for Wine which needs to implement Win32
API clock functions whose values do not get adjusted by adjtimex().
Cc: Thomas Gleixner <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Signed-off-by: Huw Davies <[email protected]>
---
arch/x86/entry/vsyscall/vsyscall_gtod.c | 6 ++++++
arch/x86/include/asm/vgtod.h | 3 ++-
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index ddc6a71df87c..fe3458caf301 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -68,6 +68,12 @@ void update_vsyscall(struct timekeeper *tk)
base->mult = tk->tkr_mono.mult;
base->shift = tk->tkr_mono.shift;
+ base = &vdata->basetime[CLOCK_MONOTONIC_RAW];
+ base->sec = tk->raw_sec;
+ base->nsec = tk->tkr_raw.xtime_nsec;
+ base->mult = tk->tkr_raw.mult;
+ base->shift = tk->tkr_raw.shift;
+
base = &vdata->basetime[CLOCK_REALTIME_COARSE];
base->sec = tk->xtime_sec;
base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index ae0d76491595..0b76ba884e25 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -30,7 +30,8 @@ struct vgtod_ts {
};
#define VGTOD_BASES (CLOCK_TAI + 1)
-#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI))
+#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | \
+ BIT(CLOCK_MONOTONIC_RAW) | BIT(CLOCK_TAI))
#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE))
/*
--
2.17.1
On Thu, 11 Apr 2019, Huw Davies wrote:
> The 'mask' member of struct vsyscall_gtod_data is unused, so remove
> it. Its use was removed in commit a51e996d48ac (x86/vdso: Enforce
> 64bit clocksource).
That's true, but we are moving the VDSO to generic code, so the mask will stay.
Thanks,
tglx
On Thu, 11 Apr 2019, Huw Davies wrote:
CC+: Vincenzo Frascino who is working on the generic VDSO.
> This will allow clocks with different mult and shift values,
> e.g. CLOCK_MONOTONIC_RAW, to be supported in the vDSO.
>
> The coarse clocks do not require these data so the values are not
> copied for these clocks.
>
> One could add potential new values of mult and shift alongside the
> existing values in struct vsyscall_gtod_data, but it seems more
> natural to group them with the actual clock data in the basetime array
> at the expense of a few more cycles in update_vsyscall().
The few cycles are one thing. Did you verify that this does not change the
cache layout for CLOCK_MONOTONIC and CLOCK_REALTIME? Let's look:
> struct vgtod_ts {
> u64 sec;
> u64 nsec;
> + u32 mult;
> + u32 shift;
> };
>
> #define VGTOD_BASES (CLOCK_TAI + 1)
> @@ -40,8 +42,6 @@ struct vsyscall_gtod_data {
>
> int vclock_mode;
> u64 cycle_last;
> - u32 mult;
> - u32 shift;
>
> struct vgtod_ts basetime[VGTOD_BASES];
The current state is:
struct vsyscall_gtod_data {
unsigned int seq; /* 0 4 */
int vclock_mode; /* 4 4 */
u64 cycle_last; /* 8 8 */
u64 mask; /* 16 8 */
u32 mult; /* 24 4 */
u32 shift; /* 28 4 */
struct vgtod_ts basetime[12]; /* 32 192 */
basetime[CLOCK_REALTIME] 32 .. 47
basetime[CLOCK_MONOTONIC] 48 .. 63
So with your change it looks like this:
struct vsyscall_gtod_data {
unsigned int seq; /* 0 4 */
int vclock_mode; /* 4 4 */
u64 cycle_last; /* 8 8 */
struct vgtod_ts basetime[12]; /* 16 288 */
which makes
basetime[CLOCK_REALTIME] 16 .. 39
basetime[CLOCK_MONOTONIC] 40 .. 63
So it stays in the same cache line, but as we move the VDSO to generic
code, the mask field needs to stay and this will make basetime[CLOCK_MONOTONIC]
overlap into the next cache line.
See https://lkml.kernel.org/r/[email protected]
for an alternate solution to this problem, which avoids this and just gives
CLOCK_MONOTONIC_RAW a separate storage space alltogether.
Thanks,
tglx
On Sun, Apr 14, 2019 at 12:53:32PM +0200, Thomas Gleixner wrote:
> So it stays in the same cache line, but as we move the VDSO to generic
> code, the mask field needs to stay and this will make basetime[CLOCK_MONOTONIC]
> overlap into the next cache line.
Thanks for the great review; this is a good point.
> See https://lkml.kernel.org/r/[email protected]
> for an alternate solution to this problem, which avoids this and just gives
> CLOCK_MONOTONIC_RAW a separate storage space alltogether.
I can certainly do this for the x86 vdso. Would that be useful or
should I wait for Vincenzo's work on the generic vdso first?
Many thanks,
Huw.
On Mon, 15 Apr 2019, Huw Davies wrote:
> On Sun, Apr 14, 2019 at 12:53:32PM +0200, Thomas Gleixner wrote:
> > So it stays in the same cache line, but as we move the VDSO to generic
> > code, the mask field needs to stay and this will make basetime[CLOCK_MONOTONIC]
> > overlap into the next cache line.
>
> Thanks for the great review; this is a good point.
>
> > See https://lkml.kernel.org/r/[email protected]
> > for an alternate solution to this problem, which avoids this and just gives
> > CLOCK_MONOTONIC_RAW a separate storage space alltogether.
>
> I can certainly do this for the x86 vdso. Would that be useful or
> should I wait for Vincenzo's work on the generic vdso first?
Depends. If Vincenzo comes along with his new version soon, then you might
get this for free :)
Vincenzo, what's the state of your work?
Thanks,
tglx
Hi Thomas,
On 15/04/2019 10:51, Thomas Gleixner wrote:
> On Mon, 15 Apr 2019, Huw Davies wrote:
>
>> On Sun, Apr 14, 2019 at 12:53:32PM +0200, Thomas Gleixner wrote:
>>> So it stays in the same cache line, but as we move the VDSO to generic
>>> code, the mask field needs to stay and this will make basetime[CLOCK_MONOTONIC]
>>> overlap into the next cache line.
>>
>> Thanks for the great review; this is a good point.
>>
>>> See https://lkml.kernel.org/r/[email protected]
>>> for an alternate solution to this problem, which avoids this and just gives
>>> CLOCK_MONOTONIC_RAW a separate storage space alltogether.
>>
>> I can certainly do this for the x86 vdso. Would that be useful or
>> should I wait for Vincenzo's work on the generic vdso first?
>
> Depends. If Vincenzo comes along with his new version soon, then you might
> get this for free :)
>
> Vincenzo, what's the state of your work?
>
I am mostly done with the development, the only thing missing is the integration
of the generic update_vsyscall. After this is complete, I will need to do some
testing and extract the performance numbers.
Considering that I will be on Easter holiday from this Wednesday till the end of
April, I think v6 will be ready around second week of May.
> Thanks,
>
> tglx
>
--
Regards,
Vincenzo
On Mon, Apr 15, 2019 at 11:15:56AM +0100, Vincenzo Frascino wrote:
> On 15/04/2019 10:51, Thomas Gleixner wrote:
> > On Mon, 15 Apr 2019, Huw Davies wrote:
> >> On Sun, Apr 14, 2019 at 12:53:32PM +0200, Thomas Gleixner wrote:
> >>> See https://lkml.kernel.org/r/[email protected]
> >>> for an alternate solution to this problem, which avoids this and just gives
> >>> CLOCK_MONOTONIC_RAW a separate storage space alltogether.
> >>
> >> I can certainly do this for the x86 vdso. Would that be useful or
> >> should I wait for Vincenzo's work on the generic vdso first?
> >
> > Depends. If Vincenzo comes along with his new version soon, then you might
> > get this for free :)
> >
> > Vincenzo, what's the state of your work?
> >
>
> I am mostly done with the development, the only thing missing is the integration
> of the generic update_vsyscall. After this is complete, I will need to do some
> testing and extract the performance numbers.
>
> Considering that I will be on Easter holiday from this Wednesday till the end of
> April, I think v6 will be ready around second week of May.
Hi Vincenzo,
Great! In which case there's not much point in me changing the x86
vdso---it'll just end up making more work for you.
Let me know if there's anything I can help with.
Huw.
Hi Huw,
On 15/04/2019 13:14, Huw Davies wrote:
> On Mon, Apr 15, 2019 at 11:15:56AM +0100, Vincenzo Frascino wrote:
>> On 15/04/2019 10:51, Thomas Gleixner wrote:
>>> On Mon, 15 Apr 2019, Huw Davies wrote:
>>>> On Sun, Apr 14, 2019 at 12:53:32PM +0200, Thomas Gleixner wrote:
>>>>> See https://lkml.kernel.org/r/[email protected]
>>>>> for an alternate solution to this problem, which avoids this and just gives
>>>>> CLOCK_MONOTONIC_RAW a separate storage space alltogether.
>>>>
>>>> I can certainly do this for the x86 vdso. Would that be useful or
>>>> should I wait for Vincenzo's work on the generic vdso first?
>>>
>>> Depends. If Vincenzo comes along with his new version soon, then you might
>>> get this for free :)
>>>
>>> Vincenzo, what's the state of your work?
>>>
>>
>> I am mostly done with the development, the only thing missing is the integration
>> of the generic update_vsyscall. After this is complete, I will need to do some
>> testing and extract the performance numbers.
>>
>> Considering that I will be on Easter holiday from this Wednesday till the end of
>> April, I think v6 will be ready around second week of May.
>
> Hi Vincenzo,
>
> Great! In which case there's not much point in me changing the x86
> vdso---it'll just end up making more work for you.
>
> Let me know if there's anything I can help with.
>
I wanted to let you know that I just posted my updated patchset for unifying
vdso. I should have tested it enough (I hope :) ), but if your help offer is
still valid, and you want to give it a go, you are more than welcome.
Please let me know if you find any issue.
> Huw.
>
--
Regards,
Vincenzo