2004-06-02 12:01:59

by Hu, Boris

[permalink] [raw]
Subject: [PATCH] One possible bugfix for CLOCK_REALTIME timer.

<<posix-abs_timer-bugfix.diff>> George,

There is one bug of CLOCK_REALTIME timer reported by Adam at
http://sources.redhat.com/ml/libc-alpha/2004-05/msg00177.html.

Here is one possible bugfix and it is against linux-2.6.6. All
TIMER_ABSTIME cloks will be collected in k_clock struct and updated in
sys_clock_settime. Could you review it? Thanks.


diff -urN -X rt.ia32/base/dontdiff
linux-2.6.6/include/linux/posix-timers.h
linux-2.6.6.dev/include/linux/posix-timers.h
--- linux-2.6.6/include/linux/posix-timers.h 2004-05-10
10:32:29.000000000 +0800
+++ linux-2.6.6.dev/include/linux/posix-timers.h 2004-06-02
10:30:57.000000000 +0800
@@ -1,9 +1,14 @@
#ifndef _linux_POSIX_TIMERS_H
#define _linux_POSIX_TIMERS_H

+#include <linux/list.h>
+#include <linux/spinlock.h>
+
struct k_clock {
int res; /* in nano seconds */
- int (*clock_set) (struct timespec * tp);
+ struct list_head abs_timer_list;
+ spinlock_t abs_timer_lock;
+ int (*clock_set) (struct timespec * tp);
int (*clock_get) (struct timespec * tp);
int (*nsleep) (int flags,
struct timespec * new_setting,
diff -urN -X rt.ia32/base/dontdiff linux-2.6.6/include/linux/timer.h
linux-2.6.6.dev/include/linux/timer.h
--- linux-2.6.6/include/linux/timer.h 2004-05-10 10:32:54.000000000
+0800
+++ linux-2.6.6.dev/include/linux/timer.h 2004-06-02
19:16:08.000000000 +0800
@@ -9,6 +9,7 @@

struct timer_list {
struct list_head entry;
+ struct list_head abs_timer_entry;
unsigned long expires;

spinlock_t lock;
diff -urN -X rt.ia32/base/dontdiff linux-2.6.6/kernel/posix-timers.c
linux-2.6.6.dev/kernel/posix-timers.c
--- linux-2.6.6/kernel/posix-timers.c 2004-05-10 10:32:37.000000000
+0800
+++ linux-2.6.6.dev/kernel/posix-timers.c 2004-06-02
19:12:31.000000000 +0800
@@ -7,6 +7,9 @@
*
* Copyright (C) 2002 2003 by MontaVista
Software.
*
+ * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
+ * Copyright (C) 2004 Boris Hu
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
(at
@@ -200,7 +203,9 @@
*/
static __init int init_posix_timers(void)
{
- struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
+ struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
+ .abs_timer_lock = SPIN_LOCK_UNLOCKED

+ };
struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
.clock_get = do_posix_clock_monotonic_gettime,
.clock_set = do_posix_clock_monotonic_settime
@@ -388,6 +393,7 @@
return;
}
posix_clocks[clock_id] = *new_clock;
+ INIT_LIST_HEAD(&posix_clocks[clock_id].abs_timer_list);
}

static struct k_itimer * alloc_posix_timer(void)
@@ -843,8 +849,15 @@
if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
{
if (timr->it_timer.expires == jiffies)
timer_notify_task(timr);
- else
+ else {
add_timer(&timr->it_timer);
+ if (flags & TIMER_ABSTIME) {
+ spin_lock(&clock->abs_timer_lock);
+
list_add_tail(&(timr->it_timer.abs_timer_entry),
+
&(clock->abs_timer_list));
+ spin_unlock(&clock->abs_timer_lock);
+ }
+ }
}
return 0;
}
@@ -1085,16 +1098,61 @@
sys_clock_settime(clockid_t which_clock, const struct timespec __user
*tp)
{
struct timespec new_tp;
+ struct timespec before, now;
+ struct k_clock *clock;
+ long ret;
+ struct timer_list *timer, *tmp;
+ struct timespec delta;
+ u64 exp = 0;

- if ((unsigned) which_clock >= MAX_CLOCKS ||
+ if ((unsigned) which_clock >= MAX_CLOCKS ||
!posix_clocks[which_clock].res)
return -EINVAL;
- if (copy_from_user(&new_tp, tp, sizeof (*tp)))
+
+ clock = &posix_clocks[which_clock];
+
+ if (copy_from_user(&new_tp, tp, sizeof (*tp)))
return -EFAULT;
if (posix_clocks[which_clock].clock_set)
return posix_clocks[which_clock].clock_set(&new_tp);

- return do_sys_settimeofday(&new_tp, NULL);
+ do_posix_gettime(clock, &before);
+
+ ret = do_sys_settimeofday(&new_tp, NULL);
+
+ /*
+ * Check if there exist TIMER_ABSTIME timers to update.
+ */
+ if (which_clock != CLOCK_REALTIME ||
+ list_empty(&clock->abs_timer_list))
+ return ret;
+
+ do_posix_gettime(clock, &now);
+ delta.tv_sec = before.tv_sec - now.tv_sec;
+ delta.tv_nsec = before.tv_nsec - now.tv_nsec;
+ while (delta.tv_nsec >= NSEC_PER_SEC) {
+ delta.tv_sec ++;
+ delta.tv_nsec -= NSEC_PER_SEC;
+ }
+ while (delta.tv_nsec < 0) {
+ delta.tv_sec --;
+ delta.tv_nsec += NSEC_PER_SEC;
+ }
+
+ tstojiffie(&delta, clock->res, &exp);
+
+ spin_lock(&(clock->abs_timer_lock));
+ list_for_each_entry_safe(timer, tmp,
+ &clock->abs_timer_list,
+ abs_timer_entry) {
+ if (timer && del_timer(timer)) { //the timer has not
run
+ timer->expires += exp;
+ add_timer(timer);
+ } else
+ list_del(&timer->abs_timer_entry);
+ }
+ spin_unlock(&(clock->abs_timer_lock));
+ return ret;
}

asmlinkage long

Good Luck !
Boris Hu (Hu Jiangtao)
Intel China Software Center
86-021-5257-4545#1277
iNET: 8-752-1277
************************************
There are my thoughts, not my employer's.
************************************
"gpg --recv-keys --keyserver wwwkeys.pgp.net 0FD7685F"
{0FD7685F:CFD6 6F5C A2CB 7881 725B CEA0 956F 9F14 0FD7 685F}



Attachments:
posix-abs_timer-bugfix.diff (5.16 kB)
posix-abs_timer-bugfix.diff

2004-06-02 22:08:38

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] One possible bugfix for CLOCK_REALTIME timer.

Hu, Boris wrote:
> <<posix-abs_timer-bugfix.diff>> George,
>
> There is one bug of CLOCK_REALTIME timer reported by Adam at
> http://sources.redhat.com/ml/libc-alpha/2004-05/msg00177.html.
>
> Here is one possible bugfix and it is against linux-2.6.6. All
> TIMER_ABSTIME cloks will be collected in k_clock struct and updated in
> sys_clock_settime. Could you review it? Thanks.

Thanks for the poke :). Could you make the following changes:

First, put the list in the posix timer structure (k_itimer), not in timer_list.
This means one more dereference when doing things, but it does not push into
the timer_list structure which is mostly used for other things.

Second, I don't see the timer being removed from the list (should happen when
ever it is inactive). Timers that repeat should be out of the list while
waiting for the signal to be picked up and put back in when add_timer is again
called.

Also, you should test to see if the clock is one that can be set prior to
putting the timer in the abs timer list. We must not correct timers on
CLOCK_MONOTONIC.

Now, for the correction. Sys_clock_settime() is the wrong place for this as the
clock can also be set a number of other ways. The right place is in
clock_was_set(), which is called if time is set in any of the several ways. The
next thing is to determine how far the clock was moved. I think the best way to
do this is to keep a copy of the wall_to_monotonic var in a private location.
This should be set to be exactly wall_to_monotonic when the system is booted (in
the same function you are setting up the clock lists) and at the end of
clock_was_set(). When clock_was_set() is called the difference between this
value and wall_to_monotonic is exactly how far the clock was moved. (Be careful
on the sign of this movement.)

Finally, be careful about races. Timers can expire while clock_was_set() is
running. The removal code should take the timer lock as well as the abs_time
list lock (at least I think this would be wise, but I could be wrong here).

And a minor issue, the community seems to prefer the C comment style to the C++
style of comments...

Thanks for your effort in this matter.

George
>
>
> diff -urN -X rt.ia32/base/dontdiff
> linux-2.6.6/include/linux/posix-timers.h
> linux-2.6.6.dev/include/linux/posix-timers.h
> --- linux-2.6.6/include/linux/posix-timers.h 2004-05-10
> 10:32:29.000000000 +0800
> +++ linux-2.6.6.dev/include/linux/posix-timers.h 2004-06-02
> 10:30:57.000000000 +0800
> @@ -1,9 +1,14 @@
> #ifndef _linux_POSIX_TIMERS_H
> #define _linux_POSIX_TIMERS_H
>
> +#include <linux/list.h>
> +#include <linux/spinlock.h>
> +
> struct k_clock {
> int res; /* in nano seconds */
> - int (*clock_set) (struct timespec * tp);
> + struct list_head abs_timer_list;
> + spinlock_t abs_timer_lock;
> + int (*clock_set) (struct timespec * tp);
> int (*clock_get) (struct timespec * tp);
> int (*nsleep) (int flags,
> struct timespec * new_setting,
> diff -urN -X rt.ia32/base/dontdiff linux-2.6.6/include/linux/timer.h
> linux-2.6.6.dev/include/linux/timer.h
> --- linux-2.6.6/include/linux/timer.h 2004-05-10 10:32:54.000000000
> +0800
> +++ linux-2.6.6.dev/include/linux/timer.h 2004-06-02
> 19:16:08.000000000 +0800
> @@ -9,6 +9,7 @@
>
> struct timer_list {
> struct list_head entry;
> + struct list_head abs_timer_entry;
> unsigned long expires;
>
> spinlock_t lock;
> diff -urN -X rt.ia32/base/dontdiff linux-2.6.6/kernel/posix-timers.c
> linux-2.6.6.dev/kernel/posix-timers.c
> --- linux-2.6.6/kernel/posix-timers.c 2004-05-10 10:32:37.000000000
> +0800
> +++ linux-2.6.6.dev/kernel/posix-timers.c 2004-06-02
> 19:12:31.000000000 +0800
> @@ -7,6 +7,9 @@
> *
> * Copyright (C) 2002 2003 by MontaVista
> Software.
> *
> + * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
> + * Copyright (C) 2004 Boris Hu
> + *
> * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> * the Free Software Foundation; either version 2 of the License, or
> (at
> @@ -200,7 +203,9 @@
> */
> static __init int init_posix_timers(void)
> {
> - struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
> + struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
> + .abs_timer_lock = SPIN_LOCK_UNLOCKED
>
> + };
> struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
> .clock_get = do_posix_clock_monotonic_gettime,
> .clock_set = do_posix_clock_monotonic_settime
> @@ -388,6 +393,7 @@
> return;
> }
> posix_clocks[clock_id] = *new_clock;
> + INIT_LIST_HEAD(&posix_clocks[clock_id].abs_timer_list);
> }
>
> static struct k_itimer * alloc_posix_timer(void)
> @@ -843,8 +849,15 @@
> if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
> {
> if (timr->it_timer.expires == jiffies)
> timer_notify_task(timr);
> - else
> + else {
> add_timer(&timr->it_timer);
> + if (flags & TIMER_ABSTIME) {
> + spin_lock(&clock->abs_timer_lock);
> +
> list_add_tail(&(timr->it_timer.abs_timer_entry),
> +
> &(clock->abs_timer_list));
> + spin_unlock(&clock->abs_timer_lock);
> + }
> + }
> }
> return 0;
> }
> @@ -1085,16 +1098,61 @@
> sys_clock_settime(clockid_t which_clock, const struct timespec __user
> *tp)
> {
> struct timespec new_tp;
> + struct timespec before, now;
> + struct k_clock *clock;
> + long ret;
> + struct timer_list *timer, *tmp;
> + struct timespec delta;
> + u64 exp = 0;
>
> - if ((unsigned) which_clock >= MAX_CLOCKS ||
> + if ((unsigned) which_clock >= MAX_CLOCKS ||
> !posix_clocks[which_clock].res)
> return -EINVAL;
> - if (copy_from_user(&new_tp, tp, sizeof (*tp)))
> +
> + clock = &posix_clocks[which_clock];
> +
> + if (copy_from_user(&new_tp, tp, sizeof (*tp)))
> return -EFAULT;
> if (posix_clocks[which_clock].clock_set)
> return posix_clocks[which_clock].clock_set(&new_tp);
>
> - return do_sys_settimeofday(&new_tp, NULL);
> + do_posix_gettime(clock, &before);
> +
> + ret = do_sys_settimeofday(&new_tp, NULL);
> +
> + /*
> + * Check if there exist TIMER_ABSTIME timers to update.
> + */
> + if (which_clock != CLOCK_REALTIME ||
> + list_empty(&clock->abs_timer_list))
> + return ret;
> +
> + do_posix_gettime(clock, &now);
> + delta.tv_sec = before.tv_sec - now.tv_sec;
> + delta.tv_nsec = before.tv_nsec - now.tv_nsec;
> + while (delta.tv_nsec >= NSEC_PER_SEC) {
> + delta.tv_sec ++;
> + delta.tv_nsec -= NSEC_PER_SEC;
> + }
> + while (delta.tv_nsec < 0) {
> + delta.tv_sec --;
> + delta.tv_nsec += NSEC_PER_SEC;
> + }
> +
> + tstojiffie(&delta, clock->res, &exp);
> +
> + spin_lock(&(clock->abs_timer_lock));
> + list_for_each_entry_safe(timer, tmp,
> + &clock->abs_timer_list,
> + abs_timer_entry) {
> + if (timer && del_timer(timer)) { //the timer has not
> run
> + timer->expires += exp;
> + add_timer(timer);
> + } else
> + list_del(&timer->abs_timer_entry);
> + }
> + spin_unlock(&(clock->abs_timer_lock));
> + return ret;
> }
>
> asmlinkage long
>
> Good Luck !
> Boris Hu (Hu Jiangtao)
> Intel China Software Center
> 86-021-5257-4545#1277
> iNET: 8-752-1277
> ************************************
> There are my thoughts, not my employer's.
> ************************************
> "gpg --recv-keys --keyserver wwwkeys.pgp.net 0FD7685F"
> {0FD7685F:CFD6 6F5C A2CB 7881 725B CEA0 956F 9F14 0FD7 685F}
>
>

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml

2004-06-03 09:57:30

by Hu, Boris

[permalink] [raw]
Subject: RE: [PATCH] One possible bugfix for CLOCK_REALTIME timer.

Thanks for your detailed comments. :)

>
> Hu, Boris wrote:
> > <<posix-abs_timer-bugfix.diff>> George,
> >
> > There is one bug of CLOCK_REALTIME timer reported by Adam at
> > http://sources.redhat.com/ml/libc-alpha/2004-05/msg00177.html.
> >
> > Here is one possible bugfix and it is against linux-2.6.6. All
> > TIMER_ABSTIME cloks will be collected in k_clock struct and updated
in
> > sys_clock_settime. Could you review it? Thanks.
>
> Thanks for the poke :). Could you make the following changes:
>
> First, put the list in the posix timer structure (k_itimer), not in
> timer_list.
> This means one more dereference when doing things, but it does not
push
> into
> the timer_list structure which is mostly used for other things.

Done.

>
> Second, I don't see the timer being removed from the list (should
happen
> when
> ever it is inactive). Timers that repeat should be out of the list
while
> waiting for the signal to be picked up and put back in when add_timer
is
> again
> called.

I tried to add the removed codes to set_timer_inactive() but it would
trigger a strange oops. I am still investigating on it. Is there any
recommending places except set_timer_inactive()?

>
> Also, you should test to see if the clock is one that can be set prior
to
> putting the timer in the abs timer list. We must not correct timers
on
> CLOCK_MONOTONIC.


Done.

>
> Now, for the correction. Sys_clock_settime() is the wrong place for
this
> as the
> clock can also be set a number of other ways. The right place is in
> clock_was_set(), which is called if time is set in any of the several
ways.
> The
> next thing is to determine how far the clock was moved. I think the
best
> way to
> do this is to keep a copy of the wall_to_monotonic var in a private
> location.
> This should be set to be exactly wall_to_monotonic when the system is
> booted (in
> the same function you are setting up the clock lists) and at the end
of
> clock_was_set(). When clock_was_set() is called the difference
between
> this
> value and wall_to_monotonic is exactly how far the clock was moved.
(Be
> careful
> on the sign of this movement.)
>

Done.

> Finally, be careful about races. Timers can expire while
clock_was_set()
> is
> running. The removal code should take the timer lock as well as the
> abs_time
> list lock (at least I think this would be wise, but I could be wrong
here).
>

IMHO, we need not take the timer locks. We del_timer() first and if the
timer has expired, we simply removed it from the abs_timer_list which is
protected by abs_timer_lock.

> And a minor issue, the community seems to prefer the C comment style
to
> the C++
> style of comments...
>

Done.

> Thanks for your effort in this matter.
>
> George
> >
> >
> > diff -urN -X rt.ia32/base/dontdiff
> > linux-2.6.6/include/linux/posix-timers.h
> > linux-2.6.6.dev/include/linux/posix-timers.h
> > --- linux-2.6.6/include/linux/posix-timers.h 2004-05-10
> > 10:32:29.000000000 +0800
> > +++ linux-2.6.6.dev/include/linux/posix-timers.h 2004-06-02
> > 10:30:57.000000000 +0800
> > @@ -1,9 +1,14 @@
> > #ifndef _linux_POSIX_TIMERS_H
> > #define _linux_POSIX_TIMERS_H
> >
> > +#include <linux/list.h>
> > +#include <linux/spinlock.h>
> > +
> > struct k_clock {
> > int res; /* in nano seconds */
> > - int (*clock_set) (struct timespec * tp);
> > + struct list_head abs_timer_list;
> > + spinlock_t abs_timer_lock;
> > + int (*clock_set) (struct timespec * tp);
> > int (*clock_get) (struct timespec * tp);
> > int (*nsleep) (int flags,
> > struct timespec * new_setting,
> > diff -urN -X rt.ia32/base/dontdiff linux-2.6.6/include/linux/timer.h
> > linux-2.6.6.dev/include/linux/timer.h
> > --- linux-2.6.6/include/linux/timer.h 2004-05-10
10:32:54.000000000
> > +0800
> > +++ linux-2.6.6.dev/include/linux/timer.h 2004-06-02
> > 19:16:08.000000000 +0800
> > @@ -9,6 +9,7 @@
> >
> > struct timer_list {
> > struct list_head entry;
> > + struct list_head abs_timer_entry;
> > unsigned long expires;
> >
> > spinlock_t lock;
> > diff -urN -X rt.ia32/base/dontdiff linux-2.6.6/kernel/posix-timers.c
> > linux-2.6.6.dev/kernel/posix-timers.c
> > --- linux-2.6.6/kernel/posix-timers.c 2004-05-10
10:32:37.000000000
> > +0800
> > +++ linux-2.6.6.dev/kernel/posix-timers.c 2004-06-02
> > 19:12:31.000000000 +0800
> > @@ -7,6 +7,9 @@
> > *
> > * Copyright (C) 2002 2003 by MontaVista
> > Software.
> > *
> > + * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
> > + * Copyright (C) 2004 Boris Hu
> > + *
> > * This program is free software; you can redistribute it and/or
modify
> > * it under the terms of the GNU General Public License as
published by
> > * the Free Software Foundation; either version 2 of the License,
or
> > (at
> > @@ -200,7 +203,9 @@
> > */
> > static __init int init_posix_timers(void)
> > {
> > - struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
> > + struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
> > + .abs_timer_lock = SPIN_LOCK_UNLOCKED
> >
> > + };
> > struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
> > .clock_get = do_posix_clock_monotonic_gettime,
> > .clock_set = do_posix_clock_monotonic_settime
> > @@ -388,6 +393,7 @@
> > return;
> > }
> > posix_clocks[clock_id] = *new_clock;
> > + INIT_LIST_HEAD(&posix_clocks[clock_id].abs_timer_list);
> > }
> >
> > static struct k_itimer * alloc_posix_timer(void)
> > @@ -843,8 +849,15 @@
> > if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
> > {
> > if (timr->it_timer.expires == jiffies)
> > timer_notify_task(timr);
> > - else
> > + else {
> > add_timer(&timr->it_timer);
> > + if (flags & TIMER_ABSTIME) {
> > + spin_lock(&clock->abs_timer_lock);
> > +
> > list_add_tail(&(timr->it_timer.abs_timer_entry),
> > +
> > &(clock->abs_timer_list));
> > +
spin_unlock(&clock->abs_timer_lock);
> > + }
> > + }
> > }
> > return 0;
> > }
> > @@ -1085,16 +1098,61 @@
> > sys_clock_settime(clockid_t which_clock, const struct timespec
__user
> > *tp)
> > {
> > struct timespec new_tp;
> > + struct timespec before, now;
> > + struct k_clock *clock;
> > + long ret;
> > + struct timer_list *timer, *tmp;
> > + struct timespec delta;
> > + u64 exp = 0;
> >
> > - if ((unsigned) which_clock >= MAX_CLOCKS ||
> > + if ((unsigned) which_clock >= MAX_CLOCKS ||
> > !posix_clocks[which_clock].res)
> > return -EINVAL;
> > - if (copy_from_user(&new_tp, tp, sizeof (*tp)))
> > +
> > + clock = &posix_clocks[which_clock];
> > +
> > + if (copy_from_user(&new_tp, tp, sizeof (*tp)))
> > return -EFAULT;
> > if (posix_clocks[which_clock].clock_set)
> > return posix_clocks[which_clock].clock_set(&new_tp);
> >
> > - return do_sys_settimeofday(&new_tp, NULL);
> > + do_posix_gettime(clock, &before);
> > +
> > + ret = do_sys_settimeofday(&new_tp, NULL);
> > +
> > + /*
> > + * Check if there exist TIMER_ABSTIME timers to update.
> > + */
> > + if (which_clock != CLOCK_REALTIME ||
> > + list_empty(&clock->abs_timer_list))
> > + return ret;
> > +
> > + do_posix_gettime(clock, &now);
> > + delta.tv_sec = before.tv_sec - now.tv_sec;
> > + delta.tv_nsec = before.tv_nsec - now.tv_nsec;
> > + while (delta.tv_nsec >= NSEC_PER_SEC) {
> > + delta.tv_sec ++;
> > + delta.tv_nsec -= NSEC_PER_SEC;
> > + }
> > + while (delta.tv_nsec < 0) {
> > + delta.tv_sec --;
> > + delta.tv_nsec += NSEC_PER_SEC;
> > + }
> > +
> > + tstojiffie(&delta, clock->res, &exp);
> > +
> > + spin_lock(&(clock->abs_timer_lock));
> > + list_for_each_entry_safe(timer, tmp,
> > + &clock->abs_timer_list,
> > + abs_timer_entry) {
> > + if (timer && del_timer(timer)) { //the timer has
not
> > run
> > + timer->expires += exp;
> > + add_timer(timer);
> > + } else
> > + list_del(&timer->abs_timer_entry);
> > + }
> > + spin_unlock(&(clock->abs_timer_lock));
> > + return ret;
> > }
> >
> > asmlinkage long
> >
> > Good Luck !
> > Boris Hu (Hu Jiangtao)
> > Intel China Software Center
> > 86-021-5257-4545#1277
> > iNET: 8-752-1277
> > ************************************
> > There are my thoughts, not my employer's.
> > ************************************
> > "gpg --recv-keys --keyserver wwwkeys.pgp.net 0FD7685F"
> > {0FD7685F:CFD6 6F5C A2CB 7881 725B CEA0 956F 9F14 0FD7 685F}
> >
> >
>
> --
> George Anzinger [email protected]
> High-res-timers: http://sourceforge.net/projects/high-res-timers/
> Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml


2004-06-04 00:37:33

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] One possible bugfix for CLOCK_REALTIME timer.

Hu, Boris wrote:
> Thanks for your detailed comments. :)
>
>
>>Hu, Boris wrote:
>>
>>> <<posix-abs_timer-bugfix.diff>> George,
>>>
>>>There is one bug of CLOCK_REALTIME timer reported by Adam at
>>>http://sources.redhat.com/ml/libc-alpha/2004-05/msg00177.html.
>>>
>>>Here is one possible bugfix and it is against linux-2.6.6. All
>>>TIMER_ABSTIME cloks will be collected in k_clock struct and updated
>
> in
>
>>>sys_clock_settime. Could you review it? Thanks.
>>
>>Thanks for the poke :). Could you make the following changes:
>>
>>First, put the list in the posix timer structure (k_itimer), not in
>>timer_list.
>> This means one more dereference when doing things, but it does not
>
> push
>
>>into
>>the timer_list structure which is mostly used for other things.
>
>
> Done.
>
>
>>Second, I don't see the timer being removed from the list (should
>
> happen
>
>>when
>>ever it is inactive). Timers that repeat should be out of the list
>
> while
>
>>waiting for the signal to be picked up and put back in when add_timer
>
> is
>
>>again
>>called.
>
>
> I tried to add the removed codes to set_timer_inactive() but it would
> trigger a strange oops. I am still investigating on it. Is there any
> recommending places except set_timer_inactive()?

Hm, set_timer_inactive() is called from the timer create routine. Should not
need to remove it here... Also, this function is used for SMP issues and, in
some cases (do_timer_settime is one) it is not called if not on an SMP system.
Also, it seems not to be called from sys_timer_delete(). This last would be a
real problem as we are about to return the memory the list runs through it.

So, I think you will just have to find the places were we delete a timer, that
and the timer call back function should do it.

>
>
>>Also, you should test to see if the clock is one that can be set prior
>
> to
>
>>putting the timer in the abs timer list. We must not correct timers
>
> on
>
>>CLOCK_MONOTONIC.
>
>
>
> Done.
>
>
>>Now, for the correction. Sys_clock_settime() is the wrong place for
>
> this
>
>>as the
>>clock can also be set a number of other ways. The right place is in
>>clock_was_set(), which is called if time is set in any of the several
>
> ways.
>
>>The
>>next thing is to determine how far the clock was moved. I think the
>
> best
>
>>way to
>>do this is to keep a copy of the wall_to_monotonic var in a private
>>location.
>>This should be set to be exactly wall_to_monotonic when the system is
>>booted (in
>>the same function you are setting up the clock lists) and at the end
>
> of
>
>>clock_was_set(). When clock_was_set() is called the difference
>
> between
>
>>this
>>value and wall_to_monotonic is exactly how far the clock was moved.
>
> (Be
>
>>careful
>>on the sign of this movement.)
>>
>
>
> Done.
>
>
>>Finally, be careful about races. Timers can expire while
>
> clock_was_set()
>
>>is
>>running. The removal code should take the timer lock as well as the
>>abs_time
>>list lock (at least I think this would be wise, but I could be wrong
>
> here).
>
>
> IMHO, we need not take the timer locks. We del_timer() first and if the
> timer has expired, we simply removed it from the abs_timer_list which is
> protected by abs_timer_lock.

Me thinks you will need to do a bit more to convince me that locks are not
needed here. Lets see if I can explain my concerns.

First, I think the clock_was_set() function needs to serialize it self so only
one cpu/ task can be in it at a time. This, I think, can/is done with the abs
list lock.

Second there is the possibility that a timer in the list will already be set via
the correct time. To avoid this possibility I suggest (this is a change from my
suggestion of yesterday) putting the current value of wall_to_monotonic in the
k_timer structure when the timer is calculated. This value must be obtained
under the xtime read lock (which we already take to calculate the timer). In
this way of doing things, clock_was_set() would take the xtime write lock,
possibly for each entry in the abs list. It would use the wall_to_monotonic
time in the structure rather than keeping a local copy, and it would update that
time once the correction was made.

We still haven't covered the case where time is set while a timer is being set.
I.e. where the expire time is calculated but the result has not yet been put
in the timer structure and add_timer has not yet been called. It is here that
the timer lock would seem to be the right thing to do. We would require that
the timer be put in the abs list while the xtime read lock is held (careful here
as this is now a sequence lock and we only want to add the timer to the list
once). This is complicated. We must take the locks in the same order so we can
not take the timer lock while holding the abs list lock. The, IMHO, simple
thing to do is to have clock_was_set() copy the whole abs list (this is just a
simple pointer manipulation). Then it can scan this list and move each entry to
the abs list as it updates the timers. The timer lock would be taken for each
timer during this update. This is to allow timers that are on the way to
add_timer to get there. This code should not remove timers from the abs list,
or rather, each timer it finds in the moved list should be put back in the abs
list even if it is not in the system timer list (it just means that someone else
is removing it). Both the removal from the moved list and the insert into the
abs list should be done under the same abs list lock but it must be dropped
while taking the timer lock.

There is a possible race here with the timer delete code. Here is how I would
solve this. First, with the list being moved, you need only be concerned with
the first entry in the list (as you will remove it as part of processing and
then do the next first entry). So, first lock the abs list. Then find the
timer ID for the first entry. Unlock the abs list, and lock the timer using the
ID. The existing lock code will take care of possible races WRT existence.
Once the timer is locked, re lock the abs list and if the given timer is still
the first entry, a.) remove it b.) if the system delete timer fails, just take
the abs list lock and reinsert the timer, else under the xtime readlock compare
wall_to_monotonic with the timers value c.) and put it in the abs list and the
add_timer list.

Note that the case of an expired timer that is still in the abs list is handled
by just reinserting the timer. This means that the expire call back code needs
to check for a clock reset that may have made the expire invalid.

I am sure there are other ways of doing this, but the main locking issues are:

1.) Getting the timer's value pegged to a given clock set value (done by copying
wall_to_monotonic to the k_timer struct)., This must be done under the xtime
read lock.

2.) Covering the race between the timer adjustment code and possible POSIX timer
deletion (done by NOT assuming the timer is there just because we found it in
the abs timer list, although we do pin it down long enough to get it's ID by
locking this list). This also requires us to take the timer lock which means we
have to drop the abs list lock.

3.) Covering the race between the timer expiring during the system clock setting
processing. Done by having the timer call back code verify that the same value
of wall_to_monotonic is still in play.

We note that the time used for NOW in the repeating timer update needs to also
satisfy 1. above.

And in passing, note that the overrun count will show something going on when
the clock is moved forward and not when it is move backward.

>
>
>>And a minor issue, the community seems to prefer the C comment style
>
> to
>
>>the C++
>>style of comments...
>>
>
>
> Done.
>
>
>>Thanks for your effort in this matter.
>>
>>George
>>
>>>
>>>diff -urN -X rt.ia32/base/dontdiff
>>>linux-2.6.6/include/linux/posix-timers.h
>>>linux-2.6.6.dev/include/linux/posix-timers.h
>>>--- linux-2.6.6/include/linux/posix-timers.h 2004-05-10
>>>10:32:29.000000000 +0800
>>>+++ linux-2.6.6.dev/include/linux/posix-timers.h 2004-06-02
>>>10:30:57.000000000 +0800
>>>@@ -1,9 +1,14 @@
>>> #ifndef _linux_POSIX_TIMERS_H
>>> #define _linux_POSIX_TIMERS_H
>>>
>>>+#include <linux/list.h>
>>>+#include <linux/spinlock.h>
>>>+
>>> struct k_clock {
>>> int res; /* in nano seconds */
>>>- int (*clock_set) (struct timespec * tp);
>>>+ struct list_head abs_timer_list;
>>>+ spinlock_t abs_timer_lock;
>>>+ int (*clock_set) (struct timespec * tp);
>>> int (*clock_get) (struct timespec * tp);
>>> int (*nsleep) (int flags,
>>> struct timespec * new_setting,
>>>diff -urN -X rt.ia32/base/dontdiff linux-2.6.6/include/linux/timer.h
>>>linux-2.6.6.dev/include/linux/timer.h
>>>--- linux-2.6.6/include/linux/timer.h 2004-05-10
>
> 10:32:54.000000000
>
>>>+0800
>>>+++ linux-2.6.6.dev/include/linux/timer.h 2004-06-02
>>>19:16:08.000000000 +0800
>>>@@ -9,6 +9,7 @@
>>>
>>> struct timer_list {
>>> struct list_head entry;
>>>+ struct list_head abs_timer_entry;
>>> unsigned long expires;
>>>
>>> spinlock_t lock;
>>>diff -urN -X rt.ia32/base/dontdiff linux-2.6.6/kernel/posix-timers.c
>>>linux-2.6.6.dev/kernel/posix-timers.c
>>>--- linux-2.6.6/kernel/posix-timers.c 2004-05-10
>
> 10:32:37.000000000
>
>>>+0800
>>>+++ linux-2.6.6.dev/kernel/posix-timers.c 2004-06-02
>>>19:12:31.000000000 +0800
>>>@@ -7,6 +7,9 @@
>>> *
>>> * Copyright (C) 2002 2003 by MontaVista
>>>Software.
>>> *
>>>+ * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
>>>+ * Copyright (C) 2004 Boris Hu
>>>+ *
>>> * This program is free software; you can redistribute it and/or
>
> modify
>
>>> * it under the terms of the GNU General Public License as
>
> published by
>
>>> * the Free Software Foundation; either version 2 of the License,
>
> or
>
>>>(at
>>>@@ -200,7 +203,9 @@
>>> */
>>> static __init int init_posix_timers(void)
>>> {
>>>- struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
>>>+ struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
>>>+ .abs_timer_lock = SPIN_LOCK_UNLOCKED
>>>
>>>+ };
>>> struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
>>> .clock_get = do_posix_clock_monotonic_gettime,
>>> .clock_set = do_posix_clock_monotonic_settime
>>>@@ -388,6 +393,7 @@
>>> return;
>>> }
>>> posix_clocks[clock_id] = *new_clock;
>>>+ INIT_LIST_HEAD(&posix_clocks[clock_id].abs_timer_list);
>>> }
>>>
>>> static struct k_itimer * alloc_posix_timer(void)
>>>@@ -843,8 +849,15 @@
>>> if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
>>>{
>>> if (timr->it_timer.expires == jiffies)
>>> timer_notify_task(timr);
>>>- else
>>>+ else {
>>> add_timer(&timr->it_timer);
>>>+ if (flags & TIMER_ABSTIME) {
>>>+ spin_lock(&clock->abs_timer_lock);
>>>+
>>>list_add_tail(&(timr->it_timer.abs_timer_entry),
>>>+
>>>&(clock->abs_timer_list));
>>>+
>
> spin_unlock(&clock->abs_timer_lock);
>
>>>+ }
>>>+ }
>>> }
>>> return 0;
>>> }
>>>@@ -1085,16 +1098,61 @@
>>> sys_clock_settime(clockid_t which_clock, const struct timespec
>
> __user
>
>>>*tp)
>>> {
>>> struct timespec new_tp;
>>>+ struct timespec before, now;
>>>+ struct k_clock *clock;
>>>+ long ret;
>>>+ struct timer_list *timer, *tmp;
>>>+ struct timespec delta;
>>>+ u64 exp = 0;
>>>
>>>- if ((unsigned) which_clock >= MAX_CLOCKS ||
>>>+ if ((unsigned) which_clock >= MAX_CLOCKS ||
>>> !posix_clocks[which_clock].res)
>>> return -EINVAL;
>>>- if (copy_from_user(&new_tp, tp, sizeof (*tp)))
>>>+
>>>+ clock = &posix_clocks[which_clock];
>>>+
>>>+ if (copy_from_user(&new_tp, tp, sizeof (*tp)))
>>> return -EFAULT;
>>> if (posix_clocks[which_clock].clock_set)
>>> return posix_clocks[which_clock].clock_set(&new_tp);
>>>
>>>- return do_sys_settimeofday(&new_tp, NULL);
>>>+ do_posix_gettime(clock, &before);
>>>+
>>>+ ret = do_sys_settimeofday(&new_tp, NULL);
>>>+
>>>+ /*
>>>+ * Check if there exist TIMER_ABSTIME timers to update.
>>>+ */
>>>+ if (which_clock != CLOCK_REALTIME ||
>>>+ list_empty(&clock->abs_timer_list))
>>>+ return ret;
>>>+
>>>+ do_posix_gettime(clock, &now);
>>>+ delta.tv_sec = before.tv_sec - now.tv_sec;
>>>+ delta.tv_nsec = before.tv_nsec - now.tv_nsec;
>>>+ while (delta.tv_nsec >= NSEC_PER_SEC) {
>>>+ delta.tv_sec ++;
>>>+ delta.tv_nsec -= NSEC_PER_SEC;
>>>+ }
>>>+ while (delta.tv_nsec < 0) {
>>>+ delta.tv_sec --;
>>>+ delta.tv_nsec += NSEC_PER_SEC;
>>>+ }
>>>+
>>>+ tstojiffie(&delta, clock->res, &exp);
>>>+
>>>+ spin_lock(&(clock->abs_timer_lock));
>>>+ list_for_each_entry_safe(timer, tmp,
>>>+ &clock->abs_timer_list,
>>>+ abs_timer_entry) {
>>>+ if (timer && del_timer(timer)) { //the timer has
>
> not
>
>>>run
>>>+ timer->expires += exp;
>>>+ add_timer(timer);
>>>+ } else
>>>+ list_del(&timer->abs_timer_entry);
>>>+ }
>>>+ spin_unlock(&(clock->abs_timer_lock));
>>>+ return ret;
>>> }
>>>
>>> asmlinkage long
>>>
>>>Good Luck !
>>>Boris Hu (Hu Jiangtao)
>>>Intel China Software Center
>>>86-021-5257-4545#1277
>>>iNET: 8-752-1277
>>>************************************
>>>There are my thoughts, not my employer's.
>>>************************************
>>>"gpg --recv-keys --keyserver wwwkeys.pgp.net 0FD7685F"
>>>{0FD7685F:CFD6 6F5C A2CB 7881 725B CEA0 956F 9F14 0FD7 685F}
>>>
>>>
>>
>>--
>>George Anzinger [email protected]
>>High-res-timers: http://sourceforge.net/projects/high-res-timers/
>>Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml
>
>
>
>

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml

2004-06-04 09:22:25

by Hu, Boris

[permalink] [raw]
Subject: RE: [PATCH] One possible bugfix for CLOCK_REALTIME timer.

Here is one update version. wall_to_monotonic copy has been moved to
k_itimer. Thanks.

diff -urN -X dontdiff linux-2.6.6/include/linux/posix-timers.h
linux-2.6.6.dev/include/linux/posix-timers.h
--- linux-2.6.6/include/linux/posix-timers.h 2004-06-04
15:48:33.000000000 +0800
+++ linux-2.6.6.dev/include/linux/posix-timers.h 2004-06-04
10:40:10.000000000 +0800
@@ -1,9 +1,14 @@
#ifndef _linux_POSIX_TIMERS_H
#define _linux_POSIX_TIMERS_H

+#include <linux/spinlock.h>
+#include <linux/list.h>
+
struct k_clock {
int res; /* in nano seconds */
- int (*clock_set) (struct timespec * tp);
+ struct list_head abs_timer_list;
+ spinlock_t abs_timer_lock;
+ int (*clock_set) (struct timespec * tp);
int (*clock_get) (struct timespec * tp);
int (*nsleep) (int flags,
struct timespec * new_setting,
diff -urN -X dontdiff linux-2.6.6/include/linux/sched.h
linux-2.6.6.dev/include/linux/sched.h
--- linux-2.6.6/include/linux/sched.h 2004-06-04 15:48:33.000000000
+0800
+++ linux-2.6.6.dev/include/linux/sched.h 2004-06-04
10:39:53.000000000 +0800
@@ -342,6 +342,8 @@
struct task_struct *it_process; /* process to send signal to */
struct timer_list it_timer;
struct sigqueue *sigq; /* signal queue entry. */
+ struct list_head abs_timer_entry; /* clock abs_timer_list */
+ struct timespec wall_to_monotonic_prev;
};


diff -urN -X dontdiff linux-2.6.6/kernel/posix-timers.c
linux-2.6.6.dev/kernel/posix-timers.c
--- linux-2.6.6/kernel/posix-timers.c 2004-06-04 15:48:33.000000000
+0800
+++ linux-2.6.6.dev/kernel/posix-timers.c 2004-06-04
15:48:20.000000000 +0800
@@ -7,6 +7,9 @@
*
* Copyright (C) 2002 2003 by MontaVista
Software.
*
+ * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
+ * Copyright (C) 2004 Boris Hu
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
(at
@@ -95,7 +98,7 @@
# define set_timer_inactive(tmr) \
do { \
(tmr)->it_timer.entry.prev = (void
*)TIMER_INACTIVE; \
- } while (0)
+ } while (0)
#else
# define timer_active(tmr) BARFY // error to use outside of SMP
# define set_timer_inactive(tmr) do { } while (0)
@@ -200,7 +203,9 @@
*/
static __init int init_posix_timers(void)
{
- struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
+ struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
+ .abs_timer_lock = SPIN_LOCK_UNLOCKED

+ };
struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
.clock_get = do_posix_clock_monotonic_gettime,
.clock_set = do_posix_clock_monotonic_settime
@@ -212,7 +217,6 @@
posix_timers_cache = kmem_cache_create("posix_timers_cache",
sizeof (struct k_itimer), 0, 0,
0, 0);
idr_init(&posix_timers_id);
-
return 0;
}

@@ -360,6 +364,11 @@
set_timer_inactive(timr);
timer_notify_task(timr);
unlock_timer(timr, flags);
+ if (CLOCK_REALTIME == timr->it_clock) {
+
spin_lock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
+ list_del_init(&timr->abs_timer_entry);
+
spin_unlock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
+ }
}


@@ -388,6 +397,7 @@
return;
}
posix_clocks[clock_id] = *new_clock;
+ INIT_LIST_HEAD(&posix_clocks[clock_id].abs_timer_list);
}

static struct k_itimer * alloc_posix_timer(void)
@@ -402,6 +412,7 @@
kmem_cache_free(posix_timers_cache, tmr);
tmr = 0;
}
+ INIT_LIST_HEAD(&tmr->abs_timer_entry);
return tmr;
}

@@ -787,6 +798,7 @@
{
struct k_clock *clock = &posix_clocks[timr->it_clock];
u64 expire_64;
+ unsigned long seq;

if (old_setting)
do_timer_gettime(timr, old_setting);
@@ -813,6 +825,11 @@
#else
del_timer(&timr->it_timer);
#endif
+ if (CLOCK_REALTIME == timr->it_clock) {
+ spin_lock(&clock->abs_timer_lock);
+ list_del_init(&timr->abs_timer_entry);
+ spin_unlock(&clock->abs_timer_lock);
+ }
timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
~REQUEUE_PENDING;
timr->it_overrun_last = 0;
@@ -834,7 +851,6 @@
tstojiffie(&new_setting->it_interval, clock->res, &expire_64);
timr->it_incr = (unsigned long)expire_64;

-
/*
* For some reason the timer does not fire immediately if
expires is
* equal to jiffies, so the timer notify function is called
directly.
@@ -843,8 +859,21 @@
if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
{
if (timr->it_timer.expires == jiffies)
timer_notify_task(timr);
- else
+ else {
add_timer(&timr->it_timer);
+ if (flags & TIMER_ABSTIME &&
+ timr->it_clock == CLOCK_REALTIME) {
+ do {
+ seq =
read_seqbegin(&xtime_lock);
+ timr->wall_to_monotonic_prev =
+ wall_to_monotonic;
+ } while (read_seqretry(&xtime_lock,
seq));
+ spin_lock(&clock->abs_timer_lock);
+ list_add_tail(&(timr->abs_timer_entry),
+
&(clock->abs_timer_list));
+ spin_unlock(&clock->abs_timer_lock);
+ }
+ }
}
return 0;
}
@@ -875,10 +904,10 @@
if (!timr)
return -EINVAL;

- if (!posix_clocks[timr->it_clock].timer_set)
+ if (!posix_clocks[timr->it_clock].timer_set)
error = do_timer_settime(timr, flags, &new_spec, rtn);
else
- error = posix_clocks[timr->it_clock].timer_set(timr,
+ error = posix_clocks[timr->it_clock].timer_set(timr,
flags,

&new_spec, rtn);
unlock_timer(timr, flag);
@@ -911,6 +940,11 @@
#else
del_timer(&timer->it_timer);
#endif
+ if (CLOCK_REALTIME == timer->it_clock) {
+
spin_lock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
+ list_del_init(&timer->abs_timer_entry);
+
spin_unlock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
+ }
return 0;
}

@@ -1086,10 +1120,10 @@
{
struct timespec new_tp;

- if ((unsigned) which_clock >= MAX_CLOCKS ||
+ if ((unsigned) which_clock >= MAX_CLOCKS ||
!posix_clocks[which_clock].res)
return -EINVAL;
- if (copy_from_user(&new_tp, tp, sizeof (*tp)))
+ if (copy_from_user(&new_tp, tp, sizeof (*tp)))
return -EFAULT;
if (posix_clocks[which_clock].clock_set)
return posix_clocks[which_clock].clock_set(&new_tp);
@@ -1159,7 +1193,55 @@

void clock_was_set(void)
{
+ struct k_clock *clock = &posix_clocks[CLOCK_REALTIME];
+ struct k_itimer *timr, *tmp;
+ struct timespec delta;
+ u64 exp = 0;
+ unsigned long seq;
+
wake_up_all(&nanosleep_abs_wqueue);
+
+ /*
+ * Check if there exist TIMER_ABSTIME timers to correct.
+ */
+ if (list_empty(&clock->abs_timer_list))
+ return;
+
+ spin_lock(&clock->abs_timer_lock);
+ list_for_each_entry_safe(timr, tmp,
+ &clock->abs_timer_list,
+ abs_timer_entry) {
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ delta.tv_sec =
+ wall_to_monotonic.tv_sec
+ - timr->wall_to_monotonic_prev.tv_sec;
+ delta.tv_nsec =
+ wall_to_monotonic.tv_nsec
+ - timr->wall_to_monotonic_prev.tv_nsec;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ while (delta.tv_nsec >= NSEC_PER_SEC) {
+ delta.tv_sec ++;
+ delta.tv_nsec -= NSEC_PER_SEC;
+ }
+ while (delta.tv_nsec < 0) {
+ delta.tv_sec --;
+ delta.tv_nsec += NSEC_PER_SEC;
+ }
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ timr->wall_to_monotonic_prev =
wall_to_monotonic;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ tstojiffie(&delta, clock->res, &exp);
+ if (del_timer(&timr->it_timer)) { /* the timer has not
run */
+ timr->it_timer.expires += exp;
+ add_timer(&timr->it_timer);
+ } else
+ list_del_init(&timr->abs_timer_entry);
+ }
+ spin_unlock(&clock->abs_timer_lock);
}

long clock_nanosleep_restart(struct restart_block *restart_block);

>
> Hu, Boris wrote:
> > Thanks for your detailed comments. :)
> >
> >
> >>Hu, Boris wrote:
> >>
> >>> <<posix-abs_timer-bugfix.diff>> George,
> >>>
> >>>There is one bug of CLOCK_REALTIME timer reported by Adam at
> >>>http://sources.redhat.com/ml/libc-alpha/2004-05/msg00177.html.
> >>>
> >>>Here is one possible bugfix and it is against linux-2.6.6. All
> >>>TIMER_ABSTIME cloks will be collected in k_clock struct and updated
> >
> > in
> >
> >>>sys_clock_settime. Could you review it? Thanks.
> >>
> >>Thanks for the poke :). Could you make the following changes:
> >>
> >>First, put the list in the posix timer structure (k_itimer), not in
> >>timer_list.
> >> This means one more dereference when doing things, but it does not
> >
> > push
> >
> >>into
> >>the timer_list structure which is mostly used for other things.
> >
> >
> > Done.
> >
> >
> >>Second, I don't see the timer being removed from the list (should
> >
> > happen
> >
> >>when
> >>ever it is inactive). Timers that repeat should be out of the list
> >
> > while
> >
> >>waiting for the signal to be picked up and put back in when
add_timer
> >
> > is
> >
> >>again
> >>called.
> >
> >
> > I tried to add the removed codes to set_timer_inactive() but it
would
> > trigger a strange oops. I am still investigating on it. Is there any
> > recommending places except set_timer_inactive()?
>
> Hm, set_timer_inactive() is called from the timer create routine.
Should
> not
> need to remove it here... Also, this function is used for SMP issues
and,
> in
> some cases (do_timer_settime is one) it is not called if not on an SMP
> system.
> Also, it seems not to be called from sys_timer_delete(). This last
would
> be a
> real problem as we are about to return the memory the list runs
through it.
>
> So, I think you will just have to find the places were we delete a
timer,
> that
> and the timer call back function should do it.
>
> >
> >
> >>Also, you should test to see if the clock is one that can be set
prior
> >
> > to
> >
> >>putting the timer in the abs timer list. We must not correct timers
> >
> > on
> >
> >>CLOCK_MONOTONIC.
> >
> >
> >
> > Done.
> >
> >
> >>Now, for the correction. Sys_clock_settime() is the wrong place for
> >
> > this
> >
> >>as the
> >>clock can also be set a number of other ways. The right place is in
> >>clock_was_set(), which is called if time is set in any of the
several
> >
> > ways.
> >
> >>The
> >>next thing is to determine how far the clock was moved. I think the
> >
> > best
> >
> >>way to
> >>do this is to keep a copy of the wall_to_monotonic var in a private
> >>location.
> >>This should be set to be exactly wall_to_monotonic when the system
is
> >>booted (in
> >>the same function you are setting up the clock lists) and at the end
> >
> > of
> >
> >>clock_was_set(). When clock_was_set() is called the difference
> >
> > between
> >
> >>this
> >>value and wall_to_monotonic is exactly how far the clock was moved.
> >
> > (Be
> >
> >>careful
> >>on the sign of this movement.)
> >>
> >
> >
> > Done.
> >
> >
> >>Finally, be careful about races. Timers can expire while
> >
> > clock_was_set()
> >
> >>is
> >>running. The removal code should take the timer lock as well as the
> >>abs_time
> >>list lock (at least I think this would be wise, but I could be wrong
> >
> > here).
> >
> >
> > IMHO, we need not take the timer locks. We del_timer() first and if
the
> > timer has expired, we simply removed it from the abs_timer_list
which is
> > protected by abs_timer_lock.
>
> Me thinks you will need to do a bit more to convince me that locks are
not
> needed here. Lets see if I can explain my concerns.
>
> First, I think the clock_was_set() function needs to serialize it self
so
> only
> one cpu/ task can be in it at a time. This, I think, can/is done with
the
> abs
> list lock.
>
> Second there is the possibility that a timer in the list will already
be
> set via
> the correct time. To avoid this possibility I suggest (this is a
change
> from my
> suggestion of yesterday) putting the current value of
wall_to_monotonic in
> the
> k_timer structure when the timer is calculated. This value must be
> obtained
> under the xtime read lock (which we already take to calculate the
timer).
> In
> this way of doing things, clock_was_set() would take the xtime write
lock,
> possibly for each entry in the abs list. It would use the
> wall_to_monotonic
> time in the structure rather than keeping a local copy, and it would
> update that
> time once the correction was made.
>
> We still haven't covered the case where time is set while a timer is
being
> set.
> I.e. where the expire time is calculated but the result has not yet
been
> put
> in the timer structure and add_timer has not yet been called. It is
here
> that
> the timer lock would seem to be the right thing to do. We would
require
> that
> the timer be put in the abs list while the xtime read lock is held
> (careful here
> as this is now a sequence lock and we only want to add the timer to
the
> list
> once). This is complicated. We must take the locks in the same order
so
> we can
> not take the timer lock while holding the abs list lock. The, IMHO,
> simple
> thing to do is to have clock_was_set() copy the whole abs list (this
is
> just a
> simple pointer manipulation). Then it can scan this list and move
each
> entry to
> the abs list as it updates the timers. The timer lock would be taken
for
> each
> timer during this update. This is to allow timers that are on the way
to
> add_timer to get there. This code should not remove timers from the
abs
> list,
> or rather, each timer it finds in the moved list should be put back in
the
> abs
> list even if it is not in the system timer list (it just means that
> someone else
> is removing it). Both the removal from the moved list and the insert
into
> the
> abs list should be done under the same abs list lock but it must be
> dropped
> while taking the timer lock.
>
> There is a possible race here with the timer delete code. Here is how
I
> would
> solve this. First, with the list being moved, you need only be
concerned
> with
> the first entry in the list (as you will remove it as part of
processing
> and
> then do the next first entry). So, first lock the abs list. Then
find
> the
> timer ID for the first entry. Unlock the abs list, and lock the timer
> using the
> ID. The existing lock code will take care of possible races WRT
existence.
> Once the timer is locked, re lock the abs list and if the given timer
is
> still
> the first entry, a.) remove it b.) if the system delete timer fails,
just
> take
> the abs list lock and reinsert the timer, else under the xtime
readlock
> compare
> wall_to_monotonic with the timers value c.) and put it in the abs list
and
> the
> add_timer list.
>
> Note that the case of an expired timer that is still in the abs list
is
> handled
> by just reinserting the timer. This means that the expire call back
code
> needs
> to check for a clock reset that may have made the expire invalid.
>
> I am sure there are other ways of doing this, but the main locking
issues
> are:
>
> 1.) Getting the timer's value pegged to a given clock set value (done
by
> copying
> wall_to_monotonic to the k_timer struct)., This must be done under
the
> xtime
> read lock.
>
> 2.) Covering the race between the timer adjustment code and possible
POSIX
> timer
> deletion (done by NOT assuming the timer is there just because we
found it
> in
> the abs timer list, although we do pin it down long enough to get it's
ID
> by
> locking this list). This also requires us to take the timer lock
which
> means we
> have to drop the abs list lock.
>
> 3.) Covering the race between the timer expiring during the system
clock
> setting
> processing. Done by having the timer call back code verify that the
same
> value
> of wall_to_monotonic is still in play.
>
> We note that the time used for NOW in the repeating timer update needs
to
> also
> satisfy 1. above.
>
> And in passing, note that the overrun count will show something going
on
> when
> the clock is moved forward and not when it is move backward.
>
> >
> >
> >>And a minor issue, the community seems to prefer the C comment style
> >
> > to
> >
> >>the C++
> >>style of comments...
> >>
> >
> >
> > Done.
> >
> >
> >>Thanks for your effort in this matter.
> >>
> >>George
> >>
> >>>
> >>>diff -urN -X rt.ia32/base/dontdiff
> >>>linux-2.6.6/include/linux/posix-timers.h
> >>>linux-2.6.6.dev/include/linux/posix-timers.h
> >>>--- linux-2.6.6/include/linux/posix-timers.h 2004-05-10
> >>>10:32:29.000000000 +0800
> >>>+++ linux-2.6.6.dev/include/linux/posix-timers.h 2004-06-02
> >>>10:30:57.000000000 +0800
> >>>@@ -1,9 +1,14 @@
> >>> #ifndef _linux_POSIX_TIMERS_H
> >>> #define _linux_POSIX_TIMERS_H
> >>>
> >>>+#include <linux/list.h>
> >>>+#include <linux/spinlock.h>
> >>>+
> >>> struct k_clock {
> >>> int res; /* in nano seconds */
> >>>- int (*clock_set) (struct timespec * tp);
> >>>+ struct list_head abs_timer_list;
> >>>+ spinlock_t abs_timer_lock;
> >>>+ int (*clock_set) (struct timespec * tp);
> >>> int (*clock_get) (struct timespec * tp);
> >>> int (*nsleep) (int flags,
> >>> struct timespec * new_setting,
> >>>diff -urN -X rt.ia32/base/dontdiff
linux-2.6.6/include/linux/timer.h
> >>>linux-2.6.6.dev/include/linux/timer.h
> >>>--- linux-2.6.6/include/linux/timer.h 2004-05-10
> >
> > 10:32:54.000000000
> >
> >>>+0800
> >>>+++ linux-2.6.6.dev/include/linux/timer.h 2004-06-02
> >>>19:16:08.000000000 +0800
> >>>@@ -9,6 +9,7 @@
> >>>
> >>> struct timer_list {
> >>> struct list_head entry;
> >>>+ struct list_head abs_timer_entry;
> >>> unsigned long expires;
> >>>
> >>> spinlock_t lock;
> >>>diff -urN -X rt.ia32/base/dontdiff
linux-2.6.6/kernel/posix-timers.c
> >>>linux-2.6.6.dev/kernel/posix-timers.c
> >>>--- linux-2.6.6/kernel/posix-timers.c 2004-05-10
> >
> > 10:32:37.000000000
> >
> >>>+0800
> >>>+++ linux-2.6.6.dev/kernel/posix-timers.c 2004-06-02
> >>>19:12:31.000000000 +0800
> >>>@@ -7,6 +7,9 @@
> >>> *
> >>> * Copyright (C) 2002 2003 by
MontaVista
> >>>Software.
> >>> *
> >>>+ * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
> >>>+ * Copyright (C) 2004 Boris Hu
> >>>+ *
> >>> * This program is free software; you can redistribute it and/or
> >
> > modify
> >
> >>> * it under the terms of the GNU General Public License as
> >
> > published by
> >
> >>> * the Free Software Foundation; either version 2 of the License,
> >
> > or
> >
> >>>(at
> >>>@@ -200,7 +203,9 @@
> >>> */
> >>> static __init int init_posix_timers(void)
> >>> {
> >>>- struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
> >>>+ struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
> >>>+ .abs_timer_lock = SPIN_LOCK_UNLOCKED
> >>>
> >>>+ };
> >>> struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
> >>> .clock_get = do_posix_clock_monotonic_gettime,
> >>> .clock_set = do_posix_clock_monotonic_settime
> >>>@@ -388,6 +393,7 @@
> >>> return;
> >>> }
> >>> posix_clocks[clock_id] = *new_clock;
> >>>+ INIT_LIST_HEAD(&posix_clocks[clock_id].abs_timer_list);
> >>> }
> >>>
> >>> static struct k_itimer * alloc_posix_timer(void)
> >>>@@ -843,8 +849,15 @@
> >>> if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
> >>>{
> >>> if (timr->it_timer.expires == jiffies)
> >>> timer_notify_task(timr);
> >>>- else
> >>>+ else {
> >>> add_timer(&timr->it_timer);
> >>>+ if (flags & TIMER_ABSTIME) {
> >>>+ spin_lock(&clock->abs_timer_lock);
> >>>+
> >>>list_add_tail(&(timr->it_timer.abs_timer_entry),
> >>>+
> >>>&(clock->abs_timer_list));
> >>>+
> >
> > spin_unlock(&clock->abs_timer_lock);
> >
> >>>+ }
> >>>+ }
> >>> }
> >>> return 0;
> >>> }
> >>>@@ -1085,16 +1098,61 @@
> >>> sys_clock_settime(clockid_t which_clock, const struct timespec
> >
> > __user
> >
> >>>*tp)
> >>> {
> >>> struct timespec new_tp;
> >>>+ struct timespec before, now;
> >>>+ struct k_clock *clock;
> >>>+ long ret;
> >>>+ struct timer_list *timer, *tmp;
> >>>+ struct timespec delta;
> >>>+ u64 exp = 0;
> >>>
> >>>- if ((unsigned) which_clock >= MAX_CLOCKS ||
> >>>+ if ((unsigned) which_clock >= MAX_CLOCKS ||
> >>> !posix_clocks[which_clock].res)
> >>> return -EINVAL;
> >>>- if (copy_from_user(&new_tp, tp, sizeof (*tp)))
> >>>+
> >>>+ clock = &posix_clocks[which_clock];
> >>>+
> >>>+ if (copy_from_user(&new_tp, tp, sizeof (*tp)))
> >>> return -EFAULT;
> >>> if (posix_clocks[which_clock].clock_set)
> >>> return posix_clocks[which_clock].clock_set(&new_tp);
> >>>
> >>>- return do_sys_settimeofday(&new_tp, NULL);
> >>>+ do_posix_gettime(clock, &before);
> >>>+
> >>>+ ret = do_sys_settimeofday(&new_tp, NULL);
> >>>+
> >>>+ /*
> >>>+ * Check if there exist TIMER_ABSTIME timers to update.
> >>>+ */
> >>>+ if (which_clock != CLOCK_REALTIME ||
> >>>+ list_empty(&clock->abs_timer_list))
> >>>+ return ret;
> >>>+
> >>>+ do_posix_gettime(clock, &now);
> >>>+ delta.tv_sec = before.tv_sec - now.tv_sec;
> >>>+ delta.tv_nsec = before.tv_nsec - now.tv_nsec;
> >>>+ while (delta.tv_nsec >= NSEC_PER_SEC) {
> >>>+ delta.tv_sec ++;
> >>>+ delta.tv_nsec -= NSEC_PER_SEC;
> >>>+ }
> >>>+ while (delta.tv_nsec < 0) {
> >>>+ delta.tv_sec --;
> >>>+ delta.tv_nsec += NSEC_PER_SEC;
> >>>+ }
> >>>+
> >>>+ tstojiffie(&delta, clock->res, &exp);
> >>>+
> >>>+ spin_lock(&(clock->abs_timer_lock));
> >>>+ list_for_each_entry_safe(timer, tmp,
> >>>+ &clock->abs_timer_list,
> >>>+ abs_timer_entry) {
> >>>+ if (timer && del_timer(timer)) { //the timer has
> >
> > not
> >
> >>>run
> >>>+ timer->expires += exp;
> >>>+ add_timer(timer);
> >>>+ } else
> >>>+ list_del(&timer->abs_timer_entry);
> >>>+ }
> >>>+ spin_unlock(&(clock->abs_timer_lock));
> >>>+ return ret;
> >>> }
> >>>
> >>> asmlinkage long
> >>>
> >>>Good Luck !
> >>>Boris Hu (Hu Jiangtao)
> >>>Intel China Software Center
> >>>86-021-5257-4545#1277
> >>>iNET: 8-752-1277
> >>>************************************
> >>>There are my thoughts, not my employer's.
> >>>************************************
> >>>"gpg --recv-keys --keyserver wwwkeys.pgp.net 0FD7685F"
> >>>{0FD7685F:CFD6 6F5C A2CB 7881 725B CEA0 956F 9F14 0FD7 685F}
> >>>
> >>>
> >>
> >>--
> >>George Anzinger [email protected]
> >>High-res-timers: http://sourceforge.net/projects/high-res-timers/
> >>Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml
> >
> >
> >
> >
>
> --
> George Anzinger [email protected]
> High-res-timers: http://sourceforge.net/projects/high-res-timers/
> Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml



Attachments:
posix-abs_timer-bugfix-0.2.diff (8.72 kB)
posix-abs_timer-bugfix-0.2.diff

2004-06-04 17:28:35

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] One possible bugfix for CLOCK_REALTIME timer.

Hu, Boris wrote:
> Here is one update version. wall_to_monotonic copy has been moved to
> k_itimer. Thanks.

As far as it goes... the update of the k_timer wall_to_monotonic should be the
same as that which is used to do the correction. I.e. it should be done within
the same lock region.

I will be out of town till Tuesday or Wed. next week....

George
>
> diff -urN -X dontdiff linux-2.6.6/include/linux/posix-timers.h
> linux-2.6.6.dev/include/linux/posix-timers.h
> --- linux-2.6.6/include/linux/posix-timers.h 2004-06-04
> 15:48:33.000000000 +0800
> +++ linux-2.6.6.dev/include/linux/posix-timers.h 2004-06-04
> 10:40:10.000000000 +0800
> @@ -1,9 +1,14 @@
> #ifndef _linux_POSIX_TIMERS_H
> #define _linux_POSIX_TIMERS_H
>
> +#include <linux/spinlock.h>
> +#include <linux/list.h>
> +
> struct k_clock {
> int res; /* in nano seconds */
> - int (*clock_set) (struct timespec * tp);
> + struct list_head abs_timer_list;
> + spinlock_t abs_timer_lock;
> + int (*clock_set) (struct timespec * tp);
> int (*clock_get) (struct timespec * tp);
> int (*nsleep) (int flags,
> struct timespec * new_setting,
> diff -urN -X dontdiff linux-2.6.6/include/linux/sched.h
> linux-2.6.6.dev/include/linux/sched.h
> --- linux-2.6.6/include/linux/sched.h 2004-06-04 15:48:33.000000000
> +0800
> +++ linux-2.6.6.dev/include/linux/sched.h 2004-06-04
> 10:39:53.000000000 +0800
> @@ -342,6 +342,8 @@
> struct task_struct *it_process; /* process to send signal to */
> struct timer_list it_timer;
> struct sigqueue *sigq; /* signal queue entry. */
> + struct list_head abs_timer_entry; /* clock abs_timer_list */
> + struct timespec wall_to_monotonic_prev;
> };
>
>
> diff -urN -X dontdiff linux-2.6.6/kernel/posix-timers.c
> linux-2.6.6.dev/kernel/posix-timers.c
> --- linux-2.6.6/kernel/posix-timers.c 2004-06-04 15:48:33.000000000
> +0800
> +++ linux-2.6.6.dev/kernel/posix-timers.c 2004-06-04
> 15:48:20.000000000 +0800
> @@ -7,6 +7,9 @@
> *
> * Copyright (C) 2002 2003 by MontaVista
> Software.
> *
> + * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
> + * Copyright (C) 2004 Boris Hu
> + *
> * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> * the Free Software Foundation; either version 2 of the License, or
> (at
> @@ -95,7 +98,7 @@
> # define set_timer_inactive(tmr) \
> do { \
> (tmr)->it_timer.entry.prev = (void
> *)TIMER_INACTIVE; \
> - } while (0)
> + } while (0)
> #else
> # define timer_active(tmr) BARFY // error to use outside of SMP
> # define set_timer_inactive(tmr) do { } while (0)
> @@ -200,7 +203,9 @@
> */
> static __init int init_posix_timers(void)
> {
> - struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
> + struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
> + .abs_timer_lock = SPIN_LOCK_UNLOCKED
>
> + };
> struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
> .clock_get = do_posix_clock_monotonic_gettime,
> .clock_set = do_posix_clock_monotonic_settime
> @@ -212,7 +217,6 @@
> posix_timers_cache = kmem_cache_create("posix_timers_cache",
> sizeof (struct k_itimer), 0, 0,
> 0, 0);
> idr_init(&posix_timers_id);
> -
> return 0;
> }
>
> @@ -360,6 +364,11 @@
> set_timer_inactive(timr);
> timer_notify_task(timr);
> unlock_timer(timr, flags);
> + if (CLOCK_REALTIME == timr->it_clock) {
> +
> spin_lock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
> + list_del_init(&timr->abs_timer_entry);
> +
> spin_unlock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
> + }
> }
>
>
> @@ -388,6 +397,7 @@
> return;
> }
> posix_clocks[clock_id] = *new_clock;
> + INIT_LIST_HEAD(&posix_clocks[clock_id].abs_timer_list);
> }
>
> static struct k_itimer * alloc_posix_timer(void)
> @@ -402,6 +412,7 @@
> kmem_cache_free(posix_timers_cache, tmr);
> tmr = 0;
> }
> + INIT_LIST_HEAD(&tmr->abs_timer_entry);
> return tmr;
> }
>
> @@ -787,6 +798,7 @@
> {
> struct k_clock *clock = &posix_clocks[timr->it_clock];
> u64 expire_64;
> + unsigned long seq;
>
> if (old_setting)
> do_timer_gettime(timr, old_setting);
> @@ -813,6 +825,11 @@
> #else
> del_timer(&timr->it_timer);
> #endif
> + if (CLOCK_REALTIME == timr->it_clock) {
> + spin_lock(&clock->abs_timer_lock);
> + list_del_init(&timr->abs_timer_entry);
> + spin_unlock(&clock->abs_timer_lock);
> + }
> timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
> ~REQUEUE_PENDING;
> timr->it_overrun_last = 0;
> @@ -834,7 +851,6 @@
> tstojiffie(&new_setting->it_interval, clock->res, &expire_64);
> timr->it_incr = (unsigned long)expire_64;
>
> -
> /*
> * For some reason the timer does not fire immediately if
> expires is
> * equal to jiffies, so the timer notify function is called
> directly.
> @@ -843,8 +859,21 @@
> if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
> {
> if (timr->it_timer.expires == jiffies)
> timer_notify_task(timr);
> - else
> + else {
> add_timer(&timr->it_timer);
> + if (flags & TIMER_ABSTIME &&
> + timr->it_clock == CLOCK_REALTIME) {
> + do {
> + seq =
> read_seqbegin(&xtime_lock);
> + timr->wall_to_monotonic_prev =
> + wall_to_monotonic;
> + } while (read_seqretry(&xtime_lock,
> seq));
> + spin_lock(&clock->abs_timer_lock);
> + list_add_tail(&(timr->abs_timer_entry),
> +
> &(clock->abs_timer_list));
> + spin_unlock(&clock->abs_timer_lock);
> + }
> + }
> }
> return 0;
> }
> @@ -875,10 +904,10 @@
> if (!timr)
> return -EINVAL;
>
> - if (!posix_clocks[timr->it_clock].timer_set)
> + if (!posix_clocks[timr->it_clock].timer_set)
> error = do_timer_settime(timr, flags, &new_spec, rtn);
> else
> - error = posix_clocks[timr->it_clock].timer_set(timr,
> + error = posix_clocks[timr->it_clock].timer_set(timr,
> flags,
>
> &new_spec, rtn);
> unlock_timer(timr, flag);
> @@ -911,6 +940,11 @@
> #else
> del_timer(&timer->it_timer);
> #endif
> + if (CLOCK_REALTIME == timer->it_clock) {
> +
> spin_lock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
> + list_del_init(&timer->abs_timer_entry);
> +
> spin_unlock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
> + }
> return 0;
> }
>
> @@ -1086,10 +1120,10 @@
> {
> struct timespec new_tp;
>
> - if ((unsigned) which_clock >= MAX_CLOCKS ||
> + if ((unsigned) which_clock >= MAX_CLOCKS ||
> !posix_clocks[which_clock].res)
> return -EINVAL;
> - if (copy_from_user(&new_tp, tp, sizeof (*tp)))
> + if (copy_from_user(&new_tp, tp, sizeof (*tp)))
> return -EFAULT;
> if (posix_clocks[which_clock].clock_set)
> return posix_clocks[which_clock].clock_set(&new_tp);
> @@ -1159,7 +1193,55 @@
>
> void clock_was_set(void)
> {
> + struct k_clock *clock = &posix_clocks[CLOCK_REALTIME];
> + struct k_itimer *timr, *tmp;
> + struct timespec delta;
> + u64 exp = 0;
> + unsigned long seq;
> +
> wake_up_all(&nanosleep_abs_wqueue);
> +
> + /*
> + * Check if there exist TIMER_ABSTIME timers to correct.
> + */
> + if (list_empty(&clock->abs_timer_list))
> + return;
> +
> + spin_lock(&clock->abs_timer_lock);
> + list_for_each_entry_safe(timr, tmp,
> + &clock->abs_timer_list,
> + abs_timer_entry) {
> + do {
> + seq = read_seqbegin(&xtime_lock);
> + delta.tv_sec =
> + wall_to_monotonic.tv_sec
> + - timr->wall_to_monotonic_prev.tv_sec;
> + delta.tv_nsec =
> + wall_to_monotonic.tv_nsec
> + - timr->wall_to_monotonic_prev.tv_nsec;
> + } while (read_seqretry(&xtime_lock, seq));
> +
> + while (delta.tv_nsec >= NSEC_PER_SEC) {
> + delta.tv_sec ++;
> + delta.tv_nsec -= NSEC_PER_SEC;
> + }
> + while (delta.tv_nsec < 0) {
> + delta.tv_sec --;
> + delta.tv_nsec += NSEC_PER_SEC;
> + }
> + do {
> + seq = read_seqbegin(&xtime_lock);
> + timr->wall_to_monotonic_prev =
> wall_to_monotonic;
> + } while (read_seqretry(&xtime_lock, seq));
> +
> + tstojiffie(&delta, clock->res, &exp);
> + if (del_timer(&timr->it_timer)) { /* the timer has not
> run */
> + timr->it_timer.expires += exp;
> + add_timer(&timr->it_timer);
> + } else
> + list_del_init(&timr->abs_timer_entry);
> + }
> + spin_unlock(&clock->abs_timer_lock);
> }
>
> long clock_nanosleep_restart(struct restart_block *restart_block);
>
>
>>Hu, Boris wrote:
>>
>>>Thanks for your detailed comments. :)
>>>
>>>
>>>
>>>>Hu, Boris wrote:
>>>>
>>>>
>>>>><<posix-abs_timer-bugfix.diff>> George,
>>>>>
>>>>>There is one bug of CLOCK_REALTIME timer reported by Adam at
>>>>>http://sources.redhat.com/ml/libc-alpha/2004-05/msg00177.html.
>>>>>
>>>>>Here is one possible bugfix and it is against linux-2.6.6. All
>>>>>TIMER_ABSTIME cloks will be collected in k_clock struct and updated
>>>
>>>in
>>>
>>>
>>>>>sys_clock_settime. Could you review it? Thanks.
>>>>
>>>>Thanks for the poke :). Could you make the following changes:
>>>>
>>>>First, put the list in the posix timer structure (k_itimer), not in
>>>>timer_list.
>>>> This means one more dereference when doing things, but it does not
>>>
>>>push
>>>
>>>
>>>>into
>>>>the timer_list structure which is mostly used for other things.
>>>
>>>
>>>Done.
>>>
>>>
>>>
>>>>Second, I don't see the timer being removed from the list (should
>>>
>>>happen
>>>
>>>
>>>>when
>>>>ever it is inactive). Timers that repeat should be out of the list
>>>
>>>while
>>>
>>>
>>>>waiting for the signal to be picked up and put back in when
>
> add_timer
>
>>>is
>>>
>>>
>>>>again
>>>>called.
>>>
>>>
>>>I tried to add the removed codes to set_timer_inactive() but it
>
> would
>
>>>trigger a strange oops. I am still investigating on it. Is there any
>>>recommending places except set_timer_inactive()?
>>
>>Hm, set_timer_inactive() is called from the timer create routine.
>
> Should
>
>>not
>>need to remove it here... Also, this function is used for SMP issues
>
> and,
>
>>in
>>some cases (do_timer_settime is one) it is not called if not on an SMP
>>system.
>>Also, it seems not to be called from sys_timer_delete(). This last
>
> would
>
>>be a
>>real problem as we are about to return the memory the list runs
>
> through it.
>
>>So, I think you will just have to find the places were we delete a
>
> timer,
>
>>that
>>and the timer call back function should do it.
>>
>>
>>>
>>>>Also, you should test to see if the clock is one that can be set
>
> prior
>
>>>to
>>>
>>>
>>>>putting the timer in the abs timer list. We must not correct timers
>>>
>>>on
>>>
>>>
>>>>CLOCK_MONOTONIC.
>>>
>>>
>>>
>>>Done.
>>>
>>>
>>>
>>>>Now, for the correction. Sys_clock_settime() is the wrong place for
>>>
>>>this
>>>
>>>
>>>>as the
>>>>clock can also be set a number of other ways. The right place is in
>>>>clock_was_set(), which is called if time is set in any of the
>
> several
>
>>>ways.
>>>
>>>
>>>>The
>>>>next thing is to determine how far the clock was moved. I think the
>>>
>>>best
>>>
>>>
>>>>way to
>>>>do this is to keep a copy of the wall_to_monotonic var in a private
>>>>location.
>>>>This should be set to be exactly wall_to_monotonic when the system
>
> is
>
>>>>booted (in
>>>>the same function you are setting up the clock lists) and at the end
>>>
>>>of
>>>
>>>
>>>>clock_was_set(). When clock_was_set() is called the difference
>>>
>>>between
>>>
>>>
>>>>this
>>>>value and wall_to_monotonic is exactly how far the clock was moved.
>>>
>>>(Be
>>>
>>>
>>>>careful
>>>>on the sign of this movement.)
>>>>
>>>
>>>
>>>Done.
>>>
>>>
>>>
>>>>Finally, be careful about races. Timers can expire while
>>>
>>>clock_was_set()
>>>
>>>
>>>>is
>>>>running. The removal code should take the timer lock as well as the
>>>>abs_time
>>>>list lock (at least I think this would be wise, but I could be wrong
>>>
>>>here).
>>>
>>>
>>>IMHO, we need not take the timer locks. We del_timer() first and if
>
> the
>
>>>timer has expired, we simply removed it from the abs_timer_list
>
> which is
>
>>>protected by abs_timer_lock.
>>
>>Me thinks you will need to do a bit more to convince me that locks are
>
> not
>
>>needed here. Lets see if I can explain my concerns.
>>
>>First, I think the clock_was_set() function needs to serialize it self
>
> so
>
>>only
>>one cpu/ task can be in it at a time. This, I think, can/is done with
>
> the
>
>>abs
>>list lock.
>>
>>Second there is the possibility that a timer in the list will already
>
> be
>
>>set via
>>the correct time. To avoid this possibility I suggest (this is a
>
> change
>
>>from my
>>suggestion of yesterday) putting the current value of
>
> wall_to_monotonic in
>
>>the
>>k_timer structure when the timer is calculated. This value must be
>>obtained
>>under the xtime read lock (which we already take to calculate the
>
> timer).
>
>>In
>>this way of doing things, clock_was_set() would take the xtime write
>
> lock,
>
>>possibly for each entry in the abs list. It would use the
>>wall_to_monotonic
>>time in the structure rather than keeping a local copy, and it would
>>update that
>>time once the correction was made.
>>
>>We still haven't covered the case where time is set while a timer is
>
> being
>
>>set.
>> I.e. where the expire time is calculated but the result has not yet
>
> been
>
>>put
>>in the timer structure and add_timer has not yet been called. It is
>
> here
>
>>that
>>the timer lock would seem to be the right thing to do. We would
>
> require
>
>>that
>>the timer be put in the abs list while the xtime read lock is held
>>(careful here
>>as this is now a sequence lock and we only want to add the timer to
>
> the
>
>>list
>>once). This is complicated. We must take the locks in the same order
>
> so
>
>>we can
>>not take the timer lock while holding the abs list lock. The, IMHO,
>>simple
>>thing to do is to have clock_was_set() copy the whole abs list (this
>
> is
>
>>just a
>>simple pointer manipulation). Then it can scan this list and move
>
> each
>
>>entry to
>>the abs list as it updates the timers. The timer lock would be taken
>
> for
>
>>each
>>timer during this update. This is to allow timers that are on the way
>
> to
>
>>add_timer to get there. This code should not remove timers from the
>
> abs
>
>>list,
>>or rather, each timer it finds in the moved list should be put back in
>
> the
>
>>abs
>>list even if it is not in the system timer list (it just means that
>>someone else
>>is removing it). Both the removal from the moved list and the insert
>
> into
>
>>the
>>abs list should be done under the same abs list lock but it must be
>>dropped
>>while taking the timer lock.
>>
>>There is a possible race here with the timer delete code. Here is how
>
> I
>
>>would
>>solve this. First, with the list being moved, you need only be
>
> concerned
>
>>with
>>the first entry in the list (as you will remove it as part of
>
> processing
>
>>and
>>then do the next first entry). So, first lock the abs list. Then
>
> find
>
>>the
>>timer ID for the first entry. Unlock the abs list, and lock the timer
>>using the
>>ID. The existing lock code will take care of possible races WRT
>
> existence.
>
>>Once the timer is locked, re lock the abs list and if the given timer
>
> is
>
>>still
>>the first entry, a.) remove it b.) if the system delete timer fails,
>
> just
>
>>take
>>the abs list lock and reinsert the timer, else under the xtime
>
> readlock
>
>>compare
>>wall_to_monotonic with the timers value c.) and put it in the abs list
>
> and
>
>>the
>>add_timer list.
>>
>>Note that the case of an expired timer that is still in the abs list
>
> is
>
>>handled
>>by just reinserting the timer. This means that the expire call back
>
> code
>
>>needs
>>to check for a clock reset that may have made the expire invalid.
>>
>>I am sure there are other ways of doing this, but the main locking
>
> issues
>
>>are:
>>
>>1.) Getting the timer's value pegged to a given clock set value (done
>
> by
>
>>copying
>>wall_to_monotonic to the k_timer struct)., This must be done under
>
> the
>
>>xtime
>>read lock.
>>
>>2.) Covering the race between the timer adjustment code and possible
>
> POSIX
>
>>timer
>>deletion (done by NOT assuming the timer is there just because we
>
> found it
>
>>in
>>the abs timer list, although we do pin it down long enough to get it's
>
> ID
>
>>by
>>locking this list). This also requires us to take the timer lock
>
> which
>
>>means we
>>have to drop the abs list lock.
>>
>>3.) Covering the race between the timer expiring during the system
>
> clock
>
>>setting
>>processing. Done by having the timer call back code verify that the
>
> same
>
>>value
>>of wall_to_monotonic is still in play.
>>
>>We note that the time used for NOW in the repeating timer update needs
>
> to
>
>>also
>>satisfy 1. above.
>>
>>And in passing, note that the overrun count will show something going
>
> on
>
>>when
>>the clock is moved forward and not when it is move backward.
>>
>>
>>>
>>>>And a minor issue, the community seems to prefer the C comment style
>>>
>>>to
>>>
>>>
>>>>the C++
>>>>style of comments...
>>>>
>>>
>>>
>>>Done.
>>>
>>>
>>>
>>>>Thanks for your effort in this matter.
>>>>
>>>>George
>>>>
>>>>
>>>>>diff -urN -X rt.ia32/base/dontdiff
>>>>>linux-2.6.6/include/linux/posix-timers.h
>>>>>linux-2.6.6.dev/include/linux/posix-timers.h
>>>>>--- linux-2.6.6/include/linux/posix-timers.h 2004-05-10
>>>>>10:32:29.000000000 +0800
>>>>>+++ linux-2.6.6.dev/include/linux/posix-timers.h 2004-06-02
>>>>>10:30:57.000000000 +0800
>>>>>@@ -1,9 +1,14 @@
>>>>>#ifndef _linux_POSIX_TIMERS_H
>>>>>#define _linux_POSIX_TIMERS_H
>>>>>
>>>>>+#include <linux/list.h>
>>>>>+#include <linux/spinlock.h>
>>>>>+
>>>>>struct k_clock {
>>>>> int res; /* in nano seconds */
>>>>>- int (*clock_set) (struct timespec * tp);
>>>>>+ struct list_head abs_timer_list;
>>>>>+ spinlock_t abs_timer_lock;
>>>>>+ int (*clock_set) (struct timespec * tp);
>>>>> int (*clock_get) (struct timespec * tp);
>>>>> int (*nsleep) (int flags,
>>>>> struct timespec * new_setting,
>>>>>diff -urN -X rt.ia32/base/dontdiff
>
> linux-2.6.6/include/linux/timer.h
>
>>>>>linux-2.6.6.dev/include/linux/timer.h
>>>>>--- linux-2.6.6/include/linux/timer.h 2004-05-10
>>>
>>>10:32:54.000000000
>>>
>>>
>>>>>+0800
>>>>>+++ linux-2.6.6.dev/include/linux/timer.h 2004-06-02
>>>>>19:16:08.000000000 +0800
>>>>>@@ -9,6 +9,7 @@
>>>>>
>>>>>struct timer_list {
>>>>> struct list_head entry;
>>>>>+ struct list_head abs_timer_entry;
>>>>> unsigned long expires;
>>>>>
>>>>> spinlock_t lock;
>>>>>diff -urN -X rt.ia32/base/dontdiff
>
> linux-2.6.6/kernel/posix-timers.c
>
>>>>>linux-2.6.6.dev/kernel/posix-timers.c
>>>>>--- linux-2.6.6/kernel/posix-timers.c 2004-05-10
>>>
>>>10:32:37.000000000
>>>
>>>
>>>>>+0800
>>>>>+++ linux-2.6.6.dev/kernel/posix-timers.c 2004-06-02
>>>>>19:12:31.000000000 +0800
>>>>>@@ -7,6 +7,9 @@
>>>>> *
>>>>> * Copyright (C) 2002 2003 by
>
> MontaVista
>
>>>>>Software.
>>>>> *
>>>>>+ * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
>>>>>+ * Copyright (C) 2004 Boris Hu
>>>>>+ *
>>>>> * This program is free software; you can redistribute it and/or
>>>
>>>modify
>>>
>>>
>>>>> * it under the terms of the GNU General Public License as
>>>
>>>published by
>>>
>>>
>>>>> * the Free Software Foundation; either version 2 of the License,
>>>
>>>or
>>>
>>>
>>>>>(at
>>>>>@@ -200,7 +203,9 @@
>>>>> */
>>>>>static __init int init_posix_timers(void)
>>>>>{
>>>>>- struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
>>>>>+ struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
>>>>>+ .abs_timer_lock = SPIN_LOCK_UNLOCKED
>>>>>
>>>>>+ };
>>>>> struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
>>>>> .clock_get = do_posix_clock_monotonic_gettime,
>>>>> .clock_set = do_posix_clock_monotonic_settime
>>>>>@@ -388,6 +393,7 @@
>>>>> return;
>>>>> }
>>>>> posix_clocks[clock_id] = *new_clock;
>>>>>+ INIT_LIST_HEAD(&posix_clocks[clock_id].abs_timer_list);
>>>>>}
>>>>>
>>>>>static struct k_itimer * alloc_posix_timer(void)
>>>>>@@ -843,8 +849,15 @@
>>>>> if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
>>>>>{
>>>>> if (timr->it_timer.expires == jiffies)
>>>>> timer_notify_task(timr);
>>>>>- else
>>>>>+ else {
>>>>> add_timer(&timr->it_timer);
>>>>>+ if (flags & TIMER_ABSTIME) {
>>>>>+ spin_lock(&clock->abs_timer_lock);
>>>>>+
>>>>>list_add_tail(&(timr->it_timer.abs_timer_entry),
>>>>>+
>>>>>&(clock->abs_timer_list));
>>>>>+
>>>
>>>spin_unlock(&clock->abs_timer_lock);
>>>
>>>
>>>>>+ }
>>>>>+ }
>>>>> }
>>>>> return 0;
>>>>>}
>>>>>@@ -1085,16 +1098,61 @@
>>>>>sys_clock_settime(clockid_t which_clock, const struct timespec
>>>
>>>__user
>>>
>>>
>>>>>*tp)
>>>>>{
>>>>> struct timespec new_tp;
>>>>>+ struct timespec before, now;
>>>>>+ struct k_clock *clock;
>>>>>+ long ret;
>>>>>+ struct timer_list *timer, *tmp;
>>>>>+ struct timespec delta;
>>>>>+ u64 exp = 0;
>>>>>
>>>>>- if ((unsigned) which_clock >= MAX_CLOCKS ||
>>>>>+ if ((unsigned) which_clock >= MAX_CLOCKS ||
>>>>> !posix_clocks[which_clock].res)
>>>>> return -EINVAL;
>>>>>- if (copy_from_user(&new_tp, tp, sizeof (*tp)))
>>>>>+
>>>>>+ clock = &posix_clocks[which_clock];
>>>>>+
>>>>>+ if (copy_from_user(&new_tp, tp, sizeof (*tp)))
>>>>> return -EFAULT;
>>>>> if (posix_clocks[which_clock].clock_set)
>>>>> return posix_clocks[which_clock].clock_set(&new_tp);
>>>>>
>>>>>- return do_sys_settimeofday(&new_tp, NULL);
>>>>>+ do_posix_gettime(clock, &before);
>>>>>+
>>>>>+ ret = do_sys_settimeofday(&new_tp, NULL);
>>>>>+
>>>>>+ /*
>>>>>+ * Check if there exist TIMER_ABSTIME timers to update.
>>>>>+ */
>>>>>+ if (which_clock != CLOCK_REALTIME ||
>>>>>+ list_empty(&clock->abs_timer_list))
>>>>>+ return ret;
>>>>>+
>>>>>+ do_posix_gettime(clock, &now);
>>>>>+ delta.tv_sec = before.tv_sec - now.tv_sec;
>>>>>+ delta.tv_nsec = before.tv_nsec - now.tv_nsec;
>>>>>+ while (delta.tv_nsec >= NSEC_PER_SEC) {
>>>>>+ delta.tv_sec ++;
>>>>>+ delta.tv_nsec -= NSEC_PER_SEC;
>>>>>+ }
>>>>>+ while (delta.tv_nsec < 0) {
>>>>>+ delta.tv_sec --;
>>>>>+ delta.tv_nsec += NSEC_PER_SEC;
>>>>>+ }
>>>>>+
>>>>>+ tstojiffie(&delta, clock->res, &exp);
>>>>>+
>>>>>+ spin_lock(&(clock->abs_timer_lock));
>>>>>+ list_for_each_entry_safe(timer, tmp,
>>>>>+ &clock->abs_timer_list,
>>>>>+ abs_timer_entry) {
>>>>>+ if (timer && del_timer(timer)) { //the timer has
>>>
>>>not
>>>
>>>
>>>>>run
>>>>>+ timer->expires += exp;
>>>>>+ add_timer(timer);
>>>>>+ } else
>>>>>+ list_del(&timer->abs_timer_entry);
>>>>>+ }
>>>>>+ spin_unlock(&(clock->abs_timer_lock));
>>>>>+ return ret;
>>>>>}
>>>>>
>>>>>asmlinkage long
>>>>>
>>>>>Good Luck !
>>>>>Boris Hu (Hu Jiangtao)
>>>>>Intel China Software Center
>>>>>86-021-5257-4545#1277
>>>>>iNET: 8-752-1277
>>>>>************************************
>>>>>There are my thoughts, not my employer's.
>>>>>************************************
>>>>>"gpg --recv-keys --keyserver wwwkeys.pgp.net 0FD7685F"
>>>>>{0FD7685F:CFD6 6F5C A2CB 7881 725B CEA0 956F 9F14 0FD7 685F}
>>>>>
>>>>>
>>>>
>>>>--
>>>>George Anzinger [email protected]
>>>>High-res-timers: http://sourceforge.net/projects/high-res-timers/
>>>>Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml
>>>
>>>
>>>
>>>
>>--
>>George Anzinger [email protected]
>>High-res-timers: http://sourceforge.net/projects/high-res-timers/
>>Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml
>
>
>

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml

2004-06-07 05:51:51

by Hu, Boris

[permalink] [raw]
Subject: RE: [PATCH] One possible bugfix for CLOCK_REALTIME timer.

One minor update according to your comments. Thanks.

diff -urN -X dontdiff linux-2.6.6/include/linux/posix-timers.h
linux-2.6.6.dev/include/linux/posix-timers.h
--- linux-2.6.6/include/linux/posix-timers.h 2004-06-04
15:48:33.000000000 +0800
+++ linux-2.6.6.dev/include/linux/posix-timers.h 2004-06-04
10:40:10.000000000 +0800
@@ -1,9 +1,14 @@
#ifndef _linux_POSIX_TIMERS_H
#define _linux_POSIX_TIMERS_H

+#include <linux/spinlock.h>
+#include <linux/list.h>
+
struct k_clock {
int res; /* in nano seconds */
- int (*clock_set) (struct timespec * tp);
+ struct list_head abs_timer_list;
+ spinlock_t abs_timer_lock;
+ int (*clock_set) (struct timespec * tp);
int (*clock_get) (struct timespec * tp);
int (*nsleep) (int flags,
struct timespec * new_setting,
diff -urN -X dontdiff linux-2.6.6/include/linux/sched.h
linux-2.6.6.dev/include/linux/sched.h
--- linux-2.6.6/include/linux/sched.h 2004-06-04 15:48:33.000000000
+0800
+++ linux-2.6.6.dev/include/linux/sched.h 2004-06-04
10:39:53.000000000 +0800
@@ -342,6 +342,8 @@
struct task_struct *it_process; /* process to send signal to */
struct timer_list it_timer;
struct sigqueue *sigq; /* signal queue entry. */
+ struct list_head abs_timer_entry; /* clock abs_timer_list */
+ struct timespec wall_to_monotonic_prev;
};


diff -urN -X dontdiff linux-2.6.6/kernel/posix-timers.c
linux-2.6.6.dev/kernel/posix-timers.c
--- linux-2.6.6/kernel/posix-timers.c 2004-06-04 15:48:33.000000000
+0800
+++ linux-2.6.6.dev/kernel/posix-timers.c 2004-06-07
12:57:28.000000000 +0800
@@ -7,6 +7,9 @@
*
* Copyright (C) 2002 2003 by MontaVista
Software.
*
+ * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
+ * Copyright (C) 2004 Boris Hu
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
(at
@@ -95,7 +98,7 @@
# define set_timer_inactive(tmr) \
do { \
(tmr)->it_timer.entry.prev = (void
*)TIMER_INACTIVE; \
- } while (0)
+ } while (0)
#else
# define timer_active(tmr) BARFY // error to use outside of SMP
# define set_timer_inactive(tmr) do { } while (0)
@@ -200,7 +203,9 @@
*/
static __init int init_posix_timers(void)
{
- struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
+ struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
+ .abs_timer_lock = SPIN_LOCK_UNLOCKED

+ };
struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
.clock_get = do_posix_clock_monotonic_gettime,
.clock_set = do_posix_clock_monotonic_settime
@@ -212,7 +217,6 @@
posix_timers_cache = kmem_cache_create("posix_timers_cache",
sizeof (struct k_itimer), 0, 0,
0, 0);
idr_init(&posix_timers_id);
-
return 0;
}

@@ -360,6 +364,11 @@
set_timer_inactive(timr);
timer_notify_task(timr);
unlock_timer(timr, flags);
+ if (CLOCK_REALTIME == timr->it_clock) {
+
spin_lock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
+ list_del_init(&timr->abs_timer_entry);
+
spin_unlock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
+ }
}


@@ -388,6 +397,7 @@
return;
}
posix_clocks[clock_id] = *new_clock;
+ INIT_LIST_HEAD(&posix_clocks[clock_id].abs_timer_list);
}

static struct k_itimer * alloc_posix_timer(void)
@@ -402,6 +412,7 @@
kmem_cache_free(posix_timers_cache, tmr);
tmr = 0;
}
+ INIT_LIST_HEAD(&tmr->abs_timer_entry);
return tmr;
}

@@ -787,6 +798,7 @@
{
struct k_clock *clock = &posix_clocks[timr->it_clock];
u64 expire_64;
+ unsigned long seq;

if (old_setting)
do_timer_gettime(timr, old_setting);
@@ -813,6 +825,11 @@
#else
del_timer(&timr->it_timer);
#endif
+ if (CLOCK_REALTIME == timr->it_clock) {
+ spin_lock(&clock->abs_timer_lock);
+ list_del_init(&timr->abs_timer_entry);
+ spin_unlock(&clock->abs_timer_lock);
+ }
timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
~REQUEUE_PENDING;
timr->it_overrun_last = 0;
@@ -834,7 +851,6 @@
tstojiffie(&new_setting->it_interval, clock->res, &expire_64);
timr->it_incr = (unsigned long)expire_64;

-
/*
* For some reason the timer does not fire immediately if
expires is
* equal to jiffies, so the timer notify function is called
directly.
@@ -843,8 +859,21 @@
if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
{
if (timr->it_timer.expires == jiffies)
timer_notify_task(timr);
- else
+ else {
add_timer(&timr->it_timer);
+ if (flags & TIMER_ABSTIME &&
+ timr->it_clock == CLOCK_REALTIME) {
+ spin_lock(&clock->abs_timer_lock);
+ do {
+ seq =
read_seqbegin(&xtime_lock);
+ timr->wall_to_monotonic_prev =
+ wall_to_monotonic;
+ } while (read_seqretry(&xtime_lock,
seq));
+ list_add_tail(&(timr->abs_timer_entry),
+
&(clock->abs_timer_list));
+ spin_unlock(&clock->abs_timer_lock);
+ }
+ }
}
return 0;
}
@@ -875,10 +904,10 @@
if (!timr)
return -EINVAL;

- if (!posix_clocks[timr->it_clock].timer_set)
+ if (!posix_clocks[timr->it_clock].timer_set)
error = do_timer_settime(timr, flags, &new_spec, rtn);
else
- error = posix_clocks[timr->it_clock].timer_set(timr,
+ error = posix_clocks[timr->it_clock].timer_set(timr,
flags,

&new_spec, rtn);
unlock_timer(timr, flag);
@@ -911,6 +940,11 @@
#else
del_timer(&timer->it_timer);
#endif
+ if (CLOCK_REALTIME == timer->it_clock) {
+
spin_lock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
+ list_del_init(&timer->abs_timer_entry);
+
spin_unlock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
+ }
return 0;
}

@@ -1086,10 +1120,10 @@
{
struct timespec new_tp;

- if ((unsigned) which_clock >= MAX_CLOCKS ||
+ if ((unsigned) which_clock >= MAX_CLOCKS ||
!posix_clocks[which_clock].res)
return -EINVAL;
- if (copy_from_user(&new_tp, tp, sizeof (*tp)))
+ if (copy_from_user(&new_tp, tp, sizeof (*tp)))
return -EFAULT;
if (posix_clocks[which_clock].clock_set)
return posix_clocks[which_clock].clock_set(&new_tp);
@@ -1159,7 +1193,55 @@

void clock_was_set(void)
{
+ struct k_clock *clock = &posix_clocks[CLOCK_REALTIME];
+ struct k_itimer *timr, *tmp;
+ struct timespec delta;
+ u64 exp = 0;
+ unsigned long seq;
+
wake_up_all(&nanosleep_abs_wqueue);
+
+ /*
+ * Check if there exist TIMER_ABSTIME timers to correct.
+ */
+ if (list_empty(&clock->abs_timer_list))
+ return;
+
+ spin_lock(&clock->abs_timer_lock);
+ list_for_each_entry_safe(timr, tmp,
+ &clock->abs_timer_list,
+ abs_timer_entry) {
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ delta.tv_sec =
+ wall_to_monotonic.tv_sec
+ - timr->wall_to_monotonic_prev.tv_sec;
+ delta.tv_nsec =
+ wall_to_monotonic.tv_nsec
+ - timr->wall_to_monotonic_prev.tv_nsec;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ while (delta.tv_nsec >= NSEC_PER_SEC) {
+ delta.tv_sec ++;
+ delta.tv_nsec -= NSEC_PER_SEC;
+ }
+ while (delta.tv_nsec < 0) {
+ delta.tv_sec --;
+ delta.tv_nsec += NSEC_PER_SEC;
+ }
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ timr->wall_to_monotonic_prev =
wall_to_monotonic;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ tstojiffie(&delta, clock->res, &exp);
+ if (del_timer(&timr->it_timer)) { /* the timer has not
run */
+ timr->it_timer.expires += exp;
+ add_timer(&timr->it_timer);
+ } else
+ list_del_init(&timr->abs_timer_entry);
+ }
+ spin_unlock(&clock->abs_timer_lock);
}

long clock_nanosleep_restart(struct restart_block *restart_block);

Boris Hu (Hu Jiangtao)
*****************************************
There are my thoughts, not my employer's.
*****************************************

> > Here is one update version. wall_to_monotonic copy has been moved to
> > k_itimer. Thanks.
>
> As far as it goes... the update of the k_timer wall_to_monotonic
should
> be the
> same as that which is used to do the correction. I.e. it should be
done
> within
> the same lock region.
>
> I will be out of town till Tuesday or Wed. next week....
>
> George
> >


Attachments:
posix-abs_timer-bugfix-0.3.diff (8.72 kB)
posix-abs_timer-bugfix-0.3.diff

2004-06-10 01:07:29

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] One possible bugfix for CLOCK_REALTIME timer.

Hu, Boris wrote:
> One minor update according to your comments. Thanks.

I think the locking is still not right. We must address these issues. Either
with code or a good argument as to why they are not issues.

1.) Getting the timer's value pegged to a given clock set value (done by copying
wall_to_monotonic to the k_timer struct)., This must be done under the xtime
read lock at the same time as the clock is read to set up the timer (i.e. the
value in k_timer MUST match the value used to set up the timer).

2.) Covering the race between the timer adjustment code and possible POSIX timer
deletion (done by NOT assuming the timer is there just because we found it in
the abs timer list, although we do pin it down long enough to get it's ID by
locking this list). This also requires us to take the timer lock which means we
have to drop the abs list lock.

3.) Covering the race between the timer expiring during the system clock setting
processing. Done by having the timer call back code verify that the same value
of wall_to_monotonic is still in play. Do note also that the timer could expire
prio to its being put in the abs list.

George


>
> diff -urN -X dontdiff linux-2.6.6/include/linux/posix-timers.h
> linux-2.6.6.dev/include/linux/posix-timers.h
> --- linux-2.6.6/include/linux/posix-timers.h 2004-06-04
> 15:48:33.000000000 +0800
> +++ linux-2.6.6.dev/include/linux/posix-timers.h 2004-06-04
> 10:40:10.000000000 +0800
> @@ -1,9 +1,14 @@
> #ifndef _linux_POSIX_TIMERS_H
> #define _linux_POSIX_TIMERS_H
>
> +#include <linux/spinlock.h>
> +#include <linux/list.h>
> +
> struct k_clock {
> int res; /* in nano seconds */
> - int (*clock_set) (struct timespec * tp);
> + struct list_head abs_timer_list;
> + spinlock_t abs_timer_lock;
> + int (*clock_set) (struct timespec * tp);
> int (*clock_get) (struct timespec * tp);
> int (*nsleep) (int flags,
> struct timespec * new_setting,
> diff -urN -X dontdiff linux-2.6.6/include/linux/sched.h
> linux-2.6.6.dev/include/linux/sched.h
> --- linux-2.6.6/include/linux/sched.h 2004-06-04 15:48:33.000000000
> +0800
> +++ linux-2.6.6.dev/include/linux/sched.h 2004-06-04
> 10:39:53.000000000 +0800
> @@ -342,6 +342,8 @@
> struct task_struct *it_process; /* process to send signal to */
> struct timer_list it_timer;
> struct sigqueue *sigq; /* signal queue entry. */
> + struct list_head abs_timer_entry; /* clock abs_timer_list */
> + struct timespec wall_to_monotonic_prev;
> };
>
>
> diff -urN -X dontdiff linux-2.6.6/kernel/posix-timers.c
> linux-2.6.6.dev/kernel/posix-timers.c
> --- linux-2.6.6/kernel/posix-timers.c 2004-06-04 15:48:33.000000000
> +0800
> +++ linux-2.6.6.dev/kernel/posix-timers.c 2004-06-07
> 12:57:28.000000000 +0800
> @@ -7,6 +7,9 @@
> *
> * Copyright (C) 2002 2003 by MontaVista
> Software.
> *
> + * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
> + * Copyright (C) 2004 Boris Hu
> + *
> * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> * the Free Software Foundation; either version 2 of the License, or
> (at
> @@ -95,7 +98,7 @@
> # define set_timer_inactive(tmr) \
> do { \
> (tmr)->it_timer.entry.prev = (void
> *)TIMER_INACTIVE; \
> - } while (0)
> + } while (0)
> #else
> # define timer_active(tmr) BARFY // error to use outside of SMP
> # define set_timer_inactive(tmr) do { } while (0)
> @@ -200,7 +203,9 @@
> */
> static __init int init_posix_timers(void)
> {
> - struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
> + struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
> + .abs_timer_lock = SPIN_LOCK_UNLOCKED
>
> + };
> struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
> .clock_get = do_posix_clock_monotonic_gettime,
> .clock_set = do_posix_clock_monotonic_settime
> @@ -212,7 +217,6 @@
> posix_timers_cache = kmem_cache_create("posix_timers_cache",
> sizeof (struct k_itimer), 0, 0,
> 0, 0);
> idr_init(&posix_timers_id);
> -
> return 0;
> }
>
> @@ -360,6 +364,11 @@
> set_timer_inactive(timr);
> timer_notify_task(timr);
> unlock_timer(timr, flags);
> + if (CLOCK_REALTIME == timr->it_clock) {
> +
> spin_lock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
> + list_del_init(&timr->abs_timer_entry);
> +
> spin_unlock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
> + }
> }
>
>
> @@ -388,6 +397,7 @@
> return;
> }
> posix_clocks[clock_id] = *new_clock;
> + INIT_LIST_HEAD(&posix_clocks[clock_id].abs_timer_list);
> }
>
> static struct k_itimer * alloc_posix_timer(void)
> @@ -402,6 +412,7 @@
> kmem_cache_free(posix_timers_cache, tmr);
> tmr = 0;
> }
> + INIT_LIST_HEAD(&tmr->abs_timer_entry);
> return tmr;
> }
>
> @@ -787,6 +798,7 @@
> {
> struct k_clock *clock = &posix_clocks[timr->it_clock];
> u64 expire_64;
> + unsigned long seq;
>
> if (old_setting)
> do_timer_gettime(timr, old_setting);
> @@ -813,6 +825,11 @@
> #else
> del_timer(&timr->it_timer);
> #endif
> + if (CLOCK_REALTIME == timr->it_clock) {
> + spin_lock(&clock->abs_timer_lock);
> + list_del_init(&timr->abs_timer_entry);
> + spin_unlock(&clock->abs_timer_lock);
> + }
> timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
> ~REQUEUE_PENDING;
> timr->it_overrun_last = 0;
> @@ -834,7 +851,6 @@
> tstojiffie(&new_setting->it_interval, clock->res, &expire_64);
> timr->it_incr = (unsigned long)expire_64;
>
> -
> /*
> * For some reason the timer does not fire immediately if
> expires is
> * equal to jiffies, so the timer notify function is called
> directly.
> @@ -843,8 +859,21 @@
> if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
> {
> if (timr->it_timer.expires == jiffies)
> timer_notify_task(timr);
> - else
> + else {
> add_timer(&timr->it_timer);
> + if (flags & TIMER_ABSTIME &&
> + timr->it_clock == CLOCK_REALTIME) {
> + spin_lock(&clock->abs_timer_lock);
> + do {
> + seq =
> read_seqbegin(&xtime_lock);
> + timr->wall_to_monotonic_prev =
> + wall_to_monotonic;
> + } while (read_seqretry(&xtime_lock,
> seq));
> + list_add_tail(&(timr->abs_timer_entry),
> +
> &(clock->abs_timer_list));
> + spin_unlock(&clock->abs_timer_lock);
> + }
> + }
> }
> return 0;
> }
> @@ -875,10 +904,10 @@
> if (!timr)
> return -EINVAL;
>
> - if (!posix_clocks[timr->it_clock].timer_set)
> + if (!posix_clocks[timr->it_clock].timer_set)
> error = do_timer_settime(timr, flags, &new_spec, rtn);
> else
> - error = posix_clocks[timr->it_clock].timer_set(timr,
> + error = posix_clocks[timr->it_clock].timer_set(timr,
> flags,
>
> &new_spec, rtn);
> unlock_timer(timr, flag);
> @@ -911,6 +940,11 @@
> #else
> del_timer(&timer->it_timer);
> #endif
> + if (CLOCK_REALTIME == timer->it_clock) {
> +
> spin_lock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
> + list_del_init(&timer->abs_timer_entry);
> +
> spin_unlock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
> + }
> return 0;
> }
>
> @@ -1086,10 +1120,10 @@
> {
> struct timespec new_tp;
>
> - if ((unsigned) which_clock >= MAX_CLOCKS ||
> + if ((unsigned) which_clock >= MAX_CLOCKS ||
> !posix_clocks[which_clock].res)
> return -EINVAL;
> - if (copy_from_user(&new_tp, tp, sizeof (*tp)))
> + if (copy_from_user(&new_tp, tp, sizeof (*tp)))
> return -EFAULT;
> if (posix_clocks[which_clock].clock_set)
> return posix_clocks[which_clock].clock_set(&new_tp);
> @@ -1159,7 +1193,55 @@
>
> void clock_was_set(void)
> {
> + struct k_clock *clock = &posix_clocks[CLOCK_REALTIME];
> + struct k_itimer *timr, *tmp;
> + struct timespec delta;
> + u64 exp = 0;
> + unsigned long seq;
> +
> wake_up_all(&nanosleep_abs_wqueue);
> +
> + /*
> + * Check if there exist TIMER_ABSTIME timers to correct.
> + */
> + if (list_empty(&clock->abs_timer_list))
> + return;
> +
> + spin_lock(&clock->abs_timer_lock);
> + list_for_each_entry_safe(timr, tmp,
> + &clock->abs_timer_list,
> + abs_timer_entry) {
> + do {
> + seq = read_seqbegin(&xtime_lock);
> + delta.tv_sec =
> + wall_to_monotonic.tv_sec
> + - timr->wall_to_monotonic_prev.tv_sec;
> + delta.tv_nsec =
> + wall_to_monotonic.tv_nsec
> + - timr->wall_to_monotonic_prev.tv_nsec;
> + } while (read_seqretry(&xtime_lock, seq));
> +
> + while (delta.tv_nsec >= NSEC_PER_SEC) {
> + delta.tv_sec ++;
> + delta.tv_nsec -= NSEC_PER_SEC;
> + }
> + while (delta.tv_nsec < 0) {
> + delta.tv_sec --;
> + delta.tv_nsec += NSEC_PER_SEC;
> + }
> + do {
> + seq = read_seqbegin(&xtime_lock);
> + timr->wall_to_monotonic_prev =
> wall_to_monotonic;
> + } while (read_seqretry(&xtime_lock, seq));
> +
> + tstojiffie(&delta, clock->res, &exp);
> + if (del_timer(&timr->it_timer)) { /* the timer has not
> run */
> + timr->it_timer.expires += exp;
> + add_timer(&timr->it_timer);
> + } else
> + list_del_init(&timr->abs_timer_entry);
> + }
> + spin_unlock(&clock->abs_timer_lock);
> }
>
> long clock_nanosleep_restart(struct restart_block *restart_block);
>
> Boris Hu (Hu Jiangtao)
> *****************************************
> There are my thoughts, not my employer's.
> *****************************************
>
>
>>>Here is one update version. wall_to_monotonic copy has been moved to
>>>k_itimer. Thanks.
>>
>>As far as it goes... the update of the k_timer wall_to_monotonic
>
> should
>
>>be the
>>same as that which is used to do the correction. I.e. it should be
>
> done
>
>>within
>>the same lock region.
>>
>>I will be out of town till Tuesday or Wed. next week....
>>
>>George
>>>
>
>
>

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml

2004-06-10 09:34:54

by Hu, Boris

[permalink] [raw]
Subject: RE: [PATCH] One possible bugfix for CLOCK_REALTIME timer.

I must miss sth in locks. Let me try to summarize all possible related
locks here, please correct me if I miss sth or mistake it. :)

There are three locks in our situation.

Locks Resource
clock->abs_timer_lock abs_timer_list

xtime_lock wall_to_monotonic

timer->it_lock timer


Scenarios
1. Add the absolute timespec timer to the abs_timer_list
1.1 copy wall_to_monotonic to timer's local
wall_to_monotonic_copy
xtime_lock readlock
1.2 add the timer to abs_timer_list
abs_timer_lock spin_lock
timer->it_lock spin_lock_irqsave???

2. Update the expire value of the absolute timespec timers in
abs_timer_list when the realtime clock is changed.
xtime_lock readlock
abs_timer_lock spin_lock
timer->it_lock spin_lock_irqsave???
3. Delete the timer from abs_timer_list when it is expired or deleted by
the user.
abs_timer_lock spin_lock
timer->it_lock spin_lock_irqsave???

Thanks.


Boris Hu (Hu Jiangtao)
*****************************************
There are my thoughts, not my employer's.
*****************************************

>
> Hu, Boris wrote:
> > One minor update according to your comments. Thanks.
>
> I think the locking is still not right. We must address these issues.
> Either
> with code or a good argument as to why they are not issues.
>
> 1.) Getting the timer's value pegged to a given clock set value (done
by
> copying
> wall_to_monotonic to the k_timer struct)., This must be done under
the
> xtime
> read lock at the same time as the clock is read to set up the timer
(i.e.
> the
> value in k_timer MUST match the value used to set up the timer).
>
> 2.) Covering the race between the timer adjustment code and possible
POSIX
> timer
> deletion (done by NOT assuming the timer is there just because we
found it
> in
> the abs timer list, although we do pin it down long enough to get it's
ID
> by
> locking this list). This also requires us to take the timer lock
which
> means we
> have to drop the abs list lock.
>
> 3.) Covering the race between the timer expiring during the system
clock
> setting
> processing. Done by having the timer call back code verify that the
same
> value
> of wall_to_monotonic is still in play. Do note also that the timer
could
> expire
> prio to its being put in the abs list.
>
> George
>
>
> >
> > diff -urN -X dontdiff linux-2.6.6/include/linux/posix-timers.h
> > linux-2.6.6.dev/include/linux/posix-timers.h
> > --- linux-2.6.6/include/linux/posix-timers.h 2004-06-04
> > 15:48:33.000000000 +0800
> > +++ linux-2.6.6.dev/include/linux/posix-timers.h 2004-06-04
> > 10:40:10.000000000 +0800
> > @@ -1,9 +1,14 @@
> > #ifndef _linux_POSIX_TIMERS_H
> > #define _linux_POSIX_TIMERS_H
> >
> > +#include <linux/spinlock.h>
> > +#include <linux/list.h>
> > +
> > struct k_clock {
> > int res; /* in nano seconds */
> > - int (*clock_set) (struct timespec * tp);
> > + struct list_head abs_timer_list;
> > + spinlock_t abs_timer_lock;
> > + int (*clock_set) (struct timespec * tp);
> > int (*clock_get) (struct timespec * tp);
> > int (*nsleep) (int flags,
> > struct timespec * new_setting,
> > diff -urN -X dontdiff linux-2.6.6/include/linux/sched.h
> > linux-2.6.6.dev/include/linux/sched.h
> > --- linux-2.6.6/include/linux/sched.h 2004-06-04
15:48:33.000000000
> > +0800
> > +++ linux-2.6.6.dev/include/linux/sched.h 2004-06-04
> > 10:39:53.000000000 +0800
> > @@ -342,6 +342,8 @@
> > struct task_struct *it_process; /* process to send signal to */
> > struct timer_list it_timer;
> > struct sigqueue *sigq; /* signal queue entry. */
> > + struct list_head abs_timer_entry; /* clock abs_timer_list
*/
> > + struct timespec wall_to_monotonic_prev;
> > };
> >
> >
> > diff -urN -X dontdiff linux-2.6.6/kernel/posix-timers.c
> > linux-2.6.6.dev/kernel/posix-timers.c
> > --- linux-2.6.6/kernel/posix-timers.c 2004-06-04
15:48:33.000000000
> > +0800
> > +++ linux-2.6.6.dev/kernel/posix-timers.c 2004-06-07
> > 12:57:28.000000000 +0800
> > @@ -7,6 +7,9 @@
> > *
> > * Copyright (C) 2002 2003 by MontaVista
> > Software.
> > *
> > + * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
> > + * Copyright (C) 2004 Boris Hu
> > + *
> > * This program is free software; you can redistribute it and/or
modify
> > * it under the terms of the GNU General Public License as
published by
> > * the Free Software Foundation; either version 2 of the License,
or
> > (at
> > @@ -95,7 +98,7 @@
> > # define set_timer_inactive(tmr) \
> > do { \
> > (tmr)->it_timer.entry.prev = (void
> > *)TIMER_INACTIVE; \
> > - } while (0)
> > + } while (0)
> > #else
> > # define timer_active(tmr) BARFY // error to use outside of SMP
> > # define set_timer_inactive(tmr) do { } while (0)
> > @@ -200,7 +203,9 @@
> > */
> > static __init int init_posix_timers(void)
> > {
> > - struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
> > + struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
> > + .abs_timer_lock = SPIN_LOCK_UNLOCKED
> >
> > + };
> > struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
> > .clock_get = do_posix_clock_monotonic_gettime,
> > .clock_set = do_posix_clock_monotonic_settime
> > @@ -212,7 +217,6 @@
> > posix_timers_cache = kmem_cache_create("posix_timers_cache",
> > sizeof (struct k_itimer), 0, 0,
> > 0, 0);
> > idr_init(&posix_timers_id);
> > -
> > return 0;
> > }
> >
> > @@ -360,6 +364,11 @@
> > set_timer_inactive(timr);
> > timer_notify_task(timr);
> > unlock_timer(timr, flags);
> > + if (CLOCK_REALTIME == timr->it_clock) {
> > +
> > spin_lock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
> > + list_del_init(&timr->abs_timer_entry);
> > +
> > spin_unlock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
> > + }
> > }
> >
> >
> > @@ -388,6 +397,7 @@
> > return;
> > }
> > posix_clocks[clock_id] = *new_clock;
> > + INIT_LIST_HEAD(&posix_clocks[clock_id].abs_timer_list);
> > }
> >
> > static struct k_itimer * alloc_posix_timer(void)
> > @@ -402,6 +412,7 @@
> > kmem_cache_free(posix_timers_cache, tmr);
> > tmr = 0;
> > }
> > + INIT_LIST_HEAD(&tmr->abs_timer_entry);
> > return tmr;
> > }
> >
> > @@ -787,6 +798,7 @@
> > {
> > struct k_clock *clock = &posix_clocks[timr->it_clock];
> > u64 expire_64;
> > + unsigned long seq;
> >
> > if (old_setting)
> > do_timer_gettime(timr, old_setting);
> > @@ -813,6 +825,11 @@
> > #else
> > del_timer(&timr->it_timer);
> > #endif
> > + if (CLOCK_REALTIME == timr->it_clock) {
> > + spin_lock(&clock->abs_timer_lock);
> > + list_del_init(&timr->abs_timer_entry);
> > + spin_unlock(&clock->abs_timer_lock);
> > + }
> > timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
> > ~REQUEUE_PENDING;
> > timr->it_overrun_last = 0;
> > @@ -834,7 +851,6 @@
> > tstojiffie(&new_setting->it_interval, clock->res, &expire_64);
> > timr->it_incr = (unsigned long)expire_64;
> >
> > -
> > /*
> > * For some reason the timer does not fire immediately if
> > expires is
> > * equal to jiffies, so the timer notify function is called
> > directly.
> > @@ -843,8 +859,21 @@
> > if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
> > {
> > if (timr->it_timer.expires == jiffies)
> > timer_notify_task(timr);
> > - else
> > + else {
> > add_timer(&timr->it_timer);
> > + if (flags & TIMER_ABSTIME &&
> > + timr->it_clock == CLOCK_REALTIME) {
> > + spin_lock(&clock->abs_timer_lock);
> > + do {
> > + seq =
> > read_seqbegin(&xtime_lock);
> > +
timr->wall_to_monotonic_prev =
> > + wall_to_monotonic;
> > + } while (read_seqretry(&xtime_lock,
> > seq));
> > +
list_add_tail(&(timr->abs_timer_entry),
> > +
> > &(clock->abs_timer_list));
> > +
spin_unlock(&clock->abs_timer_lock);
> > + }
> > + }
> > }
> > return 0;
> > }
> > @@ -875,10 +904,10 @@
> > if (!timr)
> > return -EINVAL;
> >
> > - if (!posix_clocks[timr->it_clock].timer_set)
> > + if (!posix_clocks[timr->it_clock].timer_set)
> > error = do_timer_settime(timr, flags, &new_spec, rtn);
> > else
> > - error = posix_clocks[timr->it_clock].timer_set(timr,
> > + error =
posix_clocks[timr->it_clock].timer_set(timr,
> > flags,
> >
> > &new_spec, rtn);
> > unlock_timer(timr, flag);
> > @@ -911,6 +940,11 @@
> > #else
> > del_timer(&timer->it_timer);
> > #endif
> > + if (CLOCK_REALTIME == timer->it_clock) {
> > +
> > spin_lock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
> > + list_del_init(&timer->abs_timer_entry);
> > +
> > spin_unlock(&posix_clocks[CLOCK_REALTIME].abs_timer_lock);
> > + }
> > return 0;
> > }
> >
> > @@ -1086,10 +1120,10 @@
> > {
> > struct timespec new_tp;
> >
> > - if ((unsigned) which_clock >= MAX_CLOCKS ||
> > + if ((unsigned) which_clock >= MAX_CLOCKS ||
> > !posix_clocks[which_clock].res)
> > return -EINVAL;
> > - if (copy_from_user(&new_tp, tp, sizeof (*tp)))
> > + if (copy_from_user(&new_tp, tp, sizeof (*tp)))
> > return -EFAULT;
> > if (posix_clocks[which_clock].clock_set)
> > return posix_clocks[which_clock].clock_set(&new_tp);
> > @@ -1159,7 +1193,55 @@
> >
> > void clock_was_set(void)
> > {
> > + struct k_clock *clock = &posix_clocks[CLOCK_REALTIME];
> > + struct k_itimer *timr, *tmp;
> > + struct timespec delta;
> > + u64 exp = 0;
> > + unsigned long seq;
> > +
> > wake_up_all(&nanosleep_abs_wqueue);
> > +
> > + /*
> > + * Check if there exist TIMER_ABSTIME timers to correct.
> > + */
> > + if (list_empty(&clock->abs_timer_list))
> > + return;
> > +
> > + spin_lock(&clock->abs_timer_lock);
> > + list_for_each_entry_safe(timr, tmp,
> > + &clock->abs_timer_list,
> > + abs_timer_entry) {
> > + do {
> > + seq = read_seqbegin(&xtime_lock);
> > + delta.tv_sec =
> > + wall_to_monotonic.tv_sec
> > + -
timr->wall_to_monotonic_prev.tv_sec;
> > + delta.tv_nsec =
> > + wall_to_monotonic.tv_nsec
> > + -
timr->wall_to_monotonic_prev.tv_nsec;
> > + } while (read_seqretry(&xtime_lock, seq));
> > +
> > + while (delta.tv_nsec >= NSEC_PER_SEC) {
> > + delta.tv_sec ++;
> > + delta.tv_nsec -= NSEC_PER_SEC;
> > + }
> > + while (delta.tv_nsec < 0) {
> > + delta.tv_sec --;
> > + delta.tv_nsec += NSEC_PER_SEC;
> > + }
> > + do {
> > + seq = read_seqbegin(&xtime_lock);
> > + timr->wall_to_monotonic_prev =
> > wall_to_monotonic;
> > + } while (read_seqretry(&xtime_lock, seq));
> > +
> > + tstojiffie(&delta, clock->res, &exp);
> > + if (del_timer(&timr->it_timer)) { /* the timer has
not
> > run */
> > + timr->it_timer.expires += exp;
> > + add_timer(&timr->it_timer);
> > + } else
> > + list_del_init(&timr->abs_timer_entry);
> > + }
> > + spin_unlock(&clock->abs_timer_lock);
> > }
> >
> > long clock_nanosleep_restart(struct restart_block *restart_block);
> >
> > Boris Hu (Hu Jiangtao)
> > *****************************************
> > There are my thoughts, not my employer's.
> > *****************************************
> >
> >
> >>>Here is one update version. wall_to_monotonic copy has been moved
to
> >>>k_itimer. Thanks.
> >>
> >>As far as it goes... the update of the k_timer wall_to_monotonic
> >
> > should
> >
> >>be the
> >>same as that which is used to do the correction. I.e. it should be
> >
> > done
> >
> >>within
> >>the same lock region.
> >>
> >>I will be out of town till Tuesday or Wed. next week....
> >>
> >>George
> >>>
> >
> >
> >
>
> --
> George Anzinger [email protected]
> High-res-timers: http://sourceforge.net/projects/high-res-timers/
> Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml
>


2004-06-11 22:00:38

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] One possible bugfix for CLOCK_REALTIME timer.

Hu, Boris wrote:
> I must miss sth in locks. Let me try to summarize all possible related
> locks here, please correct me if I miss sth or mistake it. :)
>
> There are three locks in our situation.
>
> Locks Resource
> clock->abs_timer_lock abs_timer_list
>
> xtime_lock wall_to_monotonic
>
> timer->it_lock timer
>
Please note that this is a learning exercise for me too. There are, of course,
several ways to handle these issues. We are trying for a relatively fast way
through the maze, at least as far as cpu time is concerned.

Lets call them abs, xtime, and timer for short.

A word about the xtime lock. The reason we take it is to get a coherent time.
To do this we are required to read all the related time values at once. We
don't need to dispose of them (i.e. place them in our structure, compute or
convert, etc.) while holding the lock, just grab all the bits.

The timer lock is, for this discussion, to insure continued existence of the
timer. Sooner or later the timer will be released back to the free memory pool...

And, for completeness, the abs lock protects the abs list structure. If held
for the entire clock_was_set sequence it will also serialize this.

>
> Scenarios
> 1. Add the absolute timespec timer to the abs_timer_list
> 1.1 copy wall_to_monotonic to timer's local
> wall_to_monotonic_copy
> xtime_lock readlock
(for coherence we need to get this at the same time we get the
current time which we do in the process of calculating the expire time)
XX> 1.2 add the timer to abs_timer_list
XX> abs_timer_lock spin_lock
XX> timer->it_lock spin_lock_irqsave???
To be more correct here, lets change the order and write it this way to show
that one is held while the other is taken (lets also include the add_timer):
1.2 add the timer to abs_timer_list
timer->it_lock
add_timer (takes the timer list lock)
abs_timer_lock
>
XX> 2. Update the expire value of the absolute timespec timers in
XX> abs_timer_list when the realtime clock is changed.
XX> xtime_lock readlock
XX> abs_timer_lock spin_lock
XX> timer->it_lock spin_lock_irqsave???
This code must take the locks in the same order to avoid dead locks. This is a
pain as we find the timer under the abs lock and then must drop the abs lock to
take the timer lock. We would like to look at, possibly doing (where we depend
on the abs list lock to keep the timer allocated):
2. clock change
abs_timer_lock (also serializes clock_was_set)
xtime_lock (see note below)
delete from timer list (takes the timer list lock)
add_timer

Note, that we are taking the xtime lock. It is needed here only because
wall_to... is two words and, again, we want coherence of these.

Also, this code, on finding that the timer is not in the timer_list (delete
fails) need do nothing. It is ok to leave it in the abs list, as it will be
removed in due course. (We assume this can only happen if we have interrupted
either 3 or 4 below.)

XX> 3. Delete the timer from abs_timer_list when it is expired or deleted by
XX> the user.
XX> abs_timer_lock spin_lock
XX> timer->it_lock spin_lock_irqsave???
Lets separate these two.
3. timer expires
<remove from the timer list> (timer list lock)
timer->it_lock
abs_timer_lock
And then we restart it...
3.1 timer->it_lock
xtime_lock (get NOW and wall_to...)
(update expire time)
add_timer
abs_timer_lock
4. Delete timer (or stop it)
MUST take in the same order as above...
timer->it_lock
<removed from the timer list>
abs_timer_lock

So, where are the "holes"?

a.) if the clock is set between 1.1 and 1.2, i.e. we have pegged the wall_to..
to the timer but do not yet have it in the abs list. This same "hole" is in
3.1. I think the easy way to fix this is to recheck wall_to... WHILE HOLDING
the abs list lock. If it fails, update as in 2. This does not need to be a
while loop as long as the abs lock is held as any changes to wall_to... beyond
this point will be serialized with clock_was_sets abs lock. Something like:
abs_timer_lock
verify (delete/ add_timer if needed)
(add to the abs list)

b.) If a timer expires and we are "on the way" to clock_was_set, we could miss
the need to restart it with a new time. Once we get in clock_was_set the abs
lock will stall the expire code, but NOT do anything as the timer is no longer
in the list. Both of these problems can be solved by the expire code verifying
that the timers wall_to... is the same as the current value. If not, and the
change requires the timer to wait longer, restart the timer.
>

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml

2004-06-28 22:03:19

by George Anzinger

[permalink] [raw]
Subject: [PATCH] Bugfix for CLOCK_REALTIME absolute timer.

diff -urP -I '\$Id:.*Exp \$' -X /usr/src/patch.exclude linux-2.6.6-kbdw/include/linux/posix-timers.h linux/include/linux/posix-timers.h
--- linux-2.6.6-kbdw/include/linux/posix-timers.h 2003-11-10 17:09:04.000000000 -0800
+++ linux/include/linux/posix-timers.h 2004-06-18 19:07:59.000000000 -0700
@@ -1,8 +1,16 @@
#ifndef _linux_POSIX_TIMERS_H
#define _linux_POSIX_TIMERS_H

+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+struct k_clock_abs {
+ struct list_head list;
+ spinlock_t lock;
+};
struct k_clock {
int res; /* in nano seconds */
+ struct k_clock_abs *abs_struct;
int (*clock_set) (struct timespec * tp);
int (*clock_get) (struct timespec * tp);
int (*nsleep) (int flags,
diff -urP -I '\$Id:.*Exp \$' -X /usr/src/patch.exclude linux-2.6.6-kbdw/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.6.6-kbdw/include/linux/sched.h 2004-05-11 17:39:10.000000000 -0700
+++ linux/include/linux/sched.h 2004-06-14 17:40:35.000000000 -0700
@@ -342,6 +342,8 @@
struct task_struct *it_process; /* process to send signal to */
struct timer_list it_timer;
struct sigqueue *sigq; /* signal queue entry. */
+ struct list_head abs_timer_entry; /* clock abs_timer_list */
+ struct timespec wall_to_prev; /* wall_to_monotonic used when set */
};


diff -urP -I '\$Id:.*Exp \$' -X /usr/src/patch.exclude linux-2.6.6-kbdw/kernel/posix-timers.c linux/kernel/posix-timers.c
--- linux-2.6.6-kbdw/kernel/posix-timers.c 2004-05-11 17:39:11.000000000 -0700
+++ linux/kernel/posix-timers.c 2004-06-25 15:04:47.000000000 -0700
@@ -7,6 +7,9 @@
*
* Copyright (C) 2002 2003 by MontaVista Software.
*
+ * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
+ * Copyright (C) 2004 Boris Hu
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
@@ -41,6 +44,7 @@
#include <linux/idr.h>
#include <linux/posix-timers.h>
#include <linux/wait.h>
+#include <linux/workqueue.h>

#ifndef div_long_long_rem
#include <asm/div64.h>
@@ -169,6 +173,12 @@
*/

static struct k_clock posix_clocks[MAX_CLOCKS];
+/*
+ * We only have one real clock that can be set so we need only one abs list,
+ * even if we should want to have several clocks with differing resolutions.
+ */
+static struct k_clock_abs abs_list = {.list = LIST_HEAD_INIT(abs_list.list),
+ .lock = SPIN_LOCK_UNLOCKED};

#define if_clock_do(clock_fun,alt_fun,parms) \
(!clock_fun) ? alt_fun parms : clock_fun parms
@@ -200,8 +210,11 @@
*/
static __init int init_posix_timers(void)
{
- struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
+ struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
+ .abs_struct = &abs_list
+ };
struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
+ .abs_struct = NULL,
.clock_get = do_posix_clock_monotonic_gettime,
.clock_set = do_posix_clock_monotonic_settime
};
@@ -212,7 +225,6 @@
posix_timers_cache = kmem_cache_create("posix_timers_cache",
sizeof (struct k_itimer), 0, 0, 0, 0);
idr_init(&posix_timers_id);
-
return 0;
}

@@ -239,19 +251,84 @@
(NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
}

+/*
+ * This function adjusts the timer as needed as a result of the clock
+ * being set. It should only be called for absolute timers, and then
+ * under the abs_list lock. It computes the time difference and sets
+ * the new jiffies value in the timer. It also updates the timers
+ * reference wall_to_monotonic value. It is complicated by the fact
+ * that tstojiffies() only handles positive times and it needs to work
+ * with both positive and negative times. Also, for negative offsets,
+ * we need to defeat the res round up.
+ *
+ * Return is true if there is a new time, else false.
+ */
+static long add_clockset_delta(struct k_itimer *timr,
+ struct timespec *new_wall_to)
+{
+ struct timespec delta;
+ int sign = 0;
+ u64 exp;
+
+ set_normalized_timespec(&delta,
+ new_wall_to->tv_sec -
+ timr->wall_to_prev.tv_sec,
+ new_wall_to->tv_nsec -
+ timr->wall_to_prev.tv_nsec);
+ if (likely(!(delta.tv_sec | delta.tv_nsec)))
+ return 0;
+ if (delta.tv_sec < 0) {
+ set_normalized_timespec(&delta,
+ -delta.tv_sec,
+ 1 - delta.tv_nsec -
+ posix_clocks[timr->it_clock].res);
+ sign++;
+ }
+ tstojiffie(&delta, posix_clocks[timr->it_clock].res, &exp);
+ timr->wall_to_prev = *new_wall_to;
+ timr->it_timer.expires += (sign ? -exp : exp);
+ return 1;
+}
+
static void schedule_next_timer(struct k_itimer *timr)
{
+ struct timespec new_wall_to;
struct now_struct now;
+ unsigned long seq;

- /* Set up the timer for the next interval (if there is one) */
+ /*
+ * Set up the timer for the next interval (if there is one).
+ * Note: this code uses the abs_timer_lock to protect
+ * wall_to_prev and must hold it until exp is set, not exactly
+ * obvious...
+
+ * This function is used for CLOCK_REALTIME* and
+ * CLOCK_MONOTONIC* timers. If we ever want to handle other
+ * CLOCKs, the calling code (do_schedule_next_timer) would need
+ * to pull the "clock" info from the timer and dispatch the
+ * "other" CLOCKs "next timer" code (which, I suppose should
+ * also be added to the k_clock structure).
+ */
if (!timr->it_incr)
return;

- posix_get_now(&now);
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ new_wall_to = wall_to_monotonic;
+ posix_get_now(&now);
+ } while (read_seqretry(&xtime_lock, seq));
+
+ if (!list_empty(&timr->abs_timer_entry)) {
+ spin_lock(&abs_list.lock);
+ add_clockset_delta(timr, &new_wall_to);
+ }
+
do {
posix_bump_timer(timr);
}while (posix_time_before(&timr->it_timer, &now));

+ if (!list_empty(&timr->abs_timer_entry))
+ spin_unlock(&abs_list.lock);
timr->it_overrun_last = timr->it_overrun;
timr->it_overrun = -1;
++timr->it_requeue_pending;
@@ -312,7 +389,15 @@

memset(&timr->sigq->info, 0, sizeof(siginfo_t));

- /* Send signal to the process that owns this timer. */
+ /*
+ * Send signal to the process that owns this timer.
+
+ * This code assumes that all the possible abs_lists share the
+ * same lock (there is only one list at this time). If this is
+ * not the case, the CLOCK info would need to be used to find
+ * the proper abs list lock.
+ */
+
timr->sigq->info.si_signo = timr->it_sigev_signo;
timr->sigq->info.si_errno = 0;
timr->sigq->info.si_code = SI_TIMER;
@@ -320,6 +405,11 @@
timr->sigq->info.si_value = timr->it_sigev_value;
if (timr->it_incr)
timr->sigq->info.si_sys_private = ++timr->it_requeue_pending;
+ else if (!list_empty(&timr->abs_timer_entry)) {
+ spin_lock(&abs_list.lock);
+ list_del_init(&timr->abs_timer_entry);
+ spin_unlock(&abs_list.lock);
+ }

if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
if (unlikely(timr->it_process->flags & PF_EXITING)) {
@@ -350,16 +440,51 @@
* This function gets called when a POSIX.1b interval timer expires. It
* is used as a callback from the kernel internal timer. The
* run_timer_list code ALWAYS calls with interrutps on.
+
+ * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers.
*/
static void posix_timer_fn(unsigned long __data)
{
struct k_itimer *timr = (struct k_itimer *) __data;
unsigned long flags;
+ unsigned long seq;
+ struct timespec delta, new_wall_to;
+ u64 exp = 0;
+ int do_notify = 1;

spin_lock_irqsave(&timr->it_lock, flags);
set_timer_inactive(timr);
- timer_notify_task(timr);
- unlock_timer(timr, flags);
+ if (!list_empty(&timr->abs_timer_entry)) {
+ spin_lock(&abs_list.lock);
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ new_wall_to = wall_to_monotonic;
+ } while (read_seqretry(&xtime_lock, seq));
+ set_normalized_timespec(&delta,
+ new_wall_to.tv_sec -
+ timr->wall_to_prev.tv_sec,
+ new_wall_to.tv_nsec -
+ timr->wall_to_prev.tv_nsec);
+ if (likely((delta.tv_sec | delta.tv_nsec ) == 0)) {
+ /* do nothing, timer is on time */
+ } else if (delta.tv_sec < 0) {
+ /* do nothing, timer is already late */
+ } else {
+ /* timer is early due to a clock set */
+ tstojiffie(&delta,
+ posix_clocks[timr->it_clock].res,
+ &exp);
+ timr->wall_to_prev = new_wall_to;
+ timr->it_timer.expires += exp;
+ add_timer(&timr->it_timer);
+ do_notify = 0;
+ }
+ spin_unlock(&abs_list.lock);
+
+ }
+ if (do_notify)
+ timer_notify_task(timr);
+ unlock_timer(timr, flags); /* hold thru abs lock to keep irq off */
}


@@ -398,6 +523,7 @@
return tmr;
memset(tmr, 0, sizeof (struct k_itimer));
tmr->it_id = (timer_t)-1;
+ INIT_LIST_HEAD(&tmr->abs_timer_entry);
if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
kmem_cache_free(posix_timers_cache, tmr);
tmr = 0;
@@ -703,11 +829,10 @@
* time to it to get the proper time for the timer.
*/
static int adjust_abs_time(struct k_clock *clock, struct timespec *tp,
- int abs, u64 *exp)
+ int abs, u64 *exp, struct timespec *wall_to)
{
struct timespec now;
struct timespec oc = *tp;
- struct timespec wall_to_mono;
u64 jiffies_64_f;
int rtn =0;

@@ -715,15 +840,15 @@
/*
* The mask pick up the 4 basic clocks
*/
- if (!(clock - &posix_clocks[0]) & ~CLOCKS_MASK) {
+ if (!((clock - &posix_clocks[0]) & ~CLOCKS_MASK)) {
jiffies_64_f = do_posix_clock_monotonic_gettime_parts(
- &now, &wall_to_mono);
+ &now, wall_to);
/*
* If we are doing a MONOTONIC clock
*/
if((clock - &posix_clocks[0]) & CLOCKS_MONO){
- now.tv_sec += wall_to_mono.tv_sec;
- now.tv_nsec += wall_to_mono.tv_nsec;
+ now.tv_sec += wall_to->tv_sec;
+ now.tv_nsec += wall_to->tv_nsec;
}
} else {
/*
@@ -813,6 +938,11 @@
#else
del_timer(&timr->it_timer);
#endif
+ if (!list_empty(&timr->abs_timer_entry)) {
+ spin_lock(&abs_list.lock);
+ list_del_init(&timr->abs_timer_entry);
+ spin_unlock(&abs_list.lock);
+ }
timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
~REQUEUE_PENDING;
timr->it_overrun_last = 0;
@@ -827,24 +957,25 @@

if (adjust_abs_time(clock,
&new_setting->it_value, flags & TIMER_ABSTIME,
- &expire_64)) {
+ &expire_64, &(timr->wall_to_prev))) {
return -EINVAL;
}
timr->it_timer.expires = (unsigned long)expire_64;
tstojiffie(&new_setting->it_interval, clock->res, &expire_64);
timr->it_incr = (unsigned long)expire_64;

-
/*
- * For some reason the timer does not fire immediately if expires is
- * equal to jiffies, so the timer notify function is called directly.
- * We do not even queue SIGEV_NONE timers!
+ * We do not even queue SIGEV_NONE timers! But we do put them
+ * in the abs list so we can do that right.
*/
- if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)) {
- if (timr->it_timer.expires == jiffies)
- timer_notify_task(timr);
- else
- add_timer(&timr->it_timer);
+ if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
+ add_timer(&timr->it_timer);
+
+ if (flags & TIMER_ABSTIME && clock->abs_struct) {
+ spin_lock(&clock->abs_struct->lock);
+ list_add_tail(&(timr->abs_timer_entry),
+ &(clock->abs_struct->list));
+ spin_unlock(&clock->abs_struct->lock);
}
return 0;
}
@@ -878,7 +1009,7 @@
if (!posix_clocks[timr->it_clock].timer_set)
error = do_timer_settime(timr, flags, &new_spec, rtn);
else
- error = posix_clocks[timr->it_clock].timer_set(timr,
+ error = posix_clocks[timr->it_clock].timer_set(timr,
flags,
&new_spec, rtn);
unlock_timer(timr, flag);
@@ -911,6 +1042,11 @@
#else
del_timer(&timer->it_timer);
#endif
+ if (!list_empty(&timer->abs_timer_entry)) {
+ spin_lock(&abs_list.lock);
+ list_del_init(&timer->abs_timer_entry);
+ spin_unlock(&abs_list.lock);
+ }
return 0;
}

@@ -1153,13 +1289,96 @@
* On locking, clock_was_set() is called from update_wall_clock which
* holds (or has held for it) a write_lock_irq( xtime_lock) and is
* called from the timer bh code. Thus we need the irq save locks.
+ *
+ * Also, on the call from update_wall_clock, that is done as part of a
+ * softirq thing. We don't want to delay the system that much (possibly
+ * long list of timers to fix), so we defer that work to keventd.
*/

static DECLARE_WAIT_QUEUE_HEAD(nanosleep_abs_wqueue);
+static DECLARE_WORK(clock_was_set_work, (void(*)(void*))clock_was_set, NULL);
+
+static DECLARE_MUTEX(clock_was_set_lock);
+#define mutex_enter(x) down(x)
+#define mutex_enter_interruptable(x) down_interruptible(x)
+#define mutex_exit(x) up(x)

void clock_was_set(void)
{
+ struct k_itimer *timr;
+ struct timespec new_wall_to;
+ LIST_HEAD(cws_list);
+ unsigned long seq;
+
+
+ if (unlikely(in_interrupt())) {
+ schedule_work(&clock_was_set_work);
+ return;
+ }
wake_up_all(&nanosleep_abs_wqueue);
+
+ /*
+ * Check if there exist TIMER_ABSTIME timers to correct.
+ *
+ * Notes on locking: This code is run in task context with irq
+ * on. We CAN be interrupted! All other usage of the abs list
+ * lock is under the timer lock which holds the irq lock as
+ * well. We REALLY don't want to scan the whole list with the
+ * interrupt system off, AND we would like a sequence lock on
+ * this code as well. Since we assume that the clock will not
+ * be set often, it seems ok to take and release the irq lock
+ * for each timer. In fact add_timer will do this, so this is
+ * not an issue. So we know when we are done, we will move the
+ * whole list to a new location. Then as we process each entry,
+ * we will move it to the actual list again. This way, when our
+ * copy is empty, we are done. We are not all that concerned
+ * about preemption so we will use a semaphore lock to protect
+ * aginst reentry. This way we will not stall another
+ * processor. It is possible that this may delay some timers
+ * that should have expired, given the new clock, but even this
+ * will be minimal as we will always update to the current time,
+ * even if it was set by a task that is waiting for entry to
+ * this code. Timers that expire too early will be caught by
+ * the expire code and restarted.
+
+ * Absolute timers that repeat are left in the abs list while
+ * waiting for the task to pick up the signal. This means we
+ * may find timers that are not in the "add_timer" list, but are
+ * in the abs list. We do the same thing for these, save
+ * putting them back in the "add_timer" list. (Note, these are
+ * left in the abs list mainly to indicate that they are
+ * ABSOLUTE timers, a fact that is used by the re-arm code, and
+ * for which we have no other flag.)
+
+ */
+
+ mutex_enter(&clock_was_set_lock);
+ spin_lock_irq(&abs_list.lock);
+ list_splice_init(&abs_list.list, &cws_list);
+ spin_unlock_irq(&abs_list.lock);
+ do {
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ new_wall_to = wall_to_monotonic;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ spin_lock_irq(&abs_list.lock);
+ if (list_empty(&cws_list)) {
+ spin_unlock_irq(&abs_list.lock);
+ break;
+ }
+ timr = list_entry(cws_list.next, struct k_itimer,
+ abs_timer_entry);
+
+ list_del_init(&timr->abs_timer_entry);
+ if (add_clockset_delta(timr, &new_wall_to) &&
+ del_timer(&timr->it_timer)) /* timer run yet? */
+ add_timer(&timr->it_timer);
+ list_add(&timr->abs_timer_entry, &abs_list.list);
+ spin_unlock_irq(&abs_list.lock);
+ } while (1);
+
+ mutex_exit(&clock_was_set_lock);
}

long clock_nanosleep_restart(struct restart_block *restart_block);
@@ -1202,7 +1421,7 @@
long
do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
{
- struct timespec t;
+ struct timespec t, dum;
struct timer_list new_timer;
DECLARE_WAITQUEUE(abs_wqueue, current);
u64 rq_time = (u64)0;
@@ -1242,7 +1461,7 @@
t = *tsave;
if (abs || !rq_time) {
adjust_abs_time(&posix_clocks[which_clock], &t, abs,
- &rq_time);
+ &rq_time, &dum);
rq_time += (t.tv_sec || t.tv_nsec);
}

Binary files linux-2.6.6-kbdw/scripts/bin2c and linux/scripts/bin2c differ


Attachments:
hrtimer-abs-2.6.6-1.2.patch (15.78 kB)

2004-06-28 22:18:44

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] Bugfix for CLOCK_REALTIME absolute timer.

George Anzinger <[email protected]> wrote:
>
> Andrew,
>
> Boris and I have kicked this around enough. It think it is ready for prime time.
>

> static void schedule_next_timer(struct k_itimer *timr)
> {
> ...
> + do {
> + seq = read_seqbegin(&xtime_lock);
> + new_wall_to = wall_to_monotonic;
> + posix_get_now(&now);
> + } while (read_seqretry(&xtime_lock, seq));
> +
> + if (!list_empty(&timr->abs_timer_entry)) {
> + spin_lock(&abs_list.lock);
> + add_clockset_delta(timr, &new_wall_to);
> + }
> +
> do {
> posix_bump_timer(timr);
> }while (posix_time_before(&timr->it_timer, &now));
>
> + if (!list_empty(&timr->abs_timer_entry))
> + spin_unlock(&abs_list.lock);

The locking in here is a bit ugly. Does the lock actually need to be held while
the timer is being bumped?

And what is the upper bound on that while loop?

> tmr->it_id = (timer_t)-1;
> + INIT_LIST_HEAD(&tmr->abs_timer_entry);
> if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {

The cat ate your tab key? ;)

> + if (!list_empty(&timr->abs_timer_entry)) {
> + spin_lock(&abs_list.lock);
> + list_del_init(&timr->abs_timer_entry);
> + spin_unlock(&abs_list.lock);
> + }

This is repeated often. Does it merit its own function?

> +static DECLARE_MUTEX(clock_was_set_lock);
> +#define mutex_enter(x) down(x)
> +#define mutex_enter_interruptable(x) down_interruptible(x)
> +#define mutex_exit(x) up(x)

Please open-code these operations.


2004-06-28 22:56:26

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] Bugfix for CLOCK_REALTIME absolute timer.

Andrew Morton wrote:
> George Anzinger <[email protected]> wrote:
>
>>Andrew,
>>
>>Boris and I have kicked this around enough. It think it is ready for prime time.
>>
>
>
>> static void schedule_next_timer(struct k_itimer *timr)
>> {
>>...
>>+ do {
>>+ seq = read_seqbegin(&xtime_lock);
>>+ new_wall_to = wall_to_monotonic;
>>+ posix_get_now(&now);
>>+ } while (read_seqretry(&xtime_lock, seq));
>>+
>>+ if (!list_empty(&timr->abs_timer_entry)) {
>>+ spin_lock(&abs_list.lock);
>>+ add_clockset_delta(timr, &new_wall_to);
>>+ }
>>+
>> do {
>> posix_bump_timer(timr);
>> }while (posix_time_before(&timr->it_timer, &now));
>>
>>+ if (!list_empty(&timr->abs_timer_entry))
>>+ spin_unlock(&abs_list.lock);
>
>
> The locking in here is a bit ugly. Does the lock actually need to be held while
> the timer is being bumped?
>
> And what is the upper bound on that while loop?

Yes, I agree that it is ugly. And on top of that, we are holding the timer lock
which is an irq version. As to the need to hold it through the bump, we want
the timers expire time to match what it should given the clock setting we are
using, otherwise, the clock_was_set() code could come through and change that
under us. (Not a pretty sight.)

Now, the bounding on the while, :( it depends on a) the interval and b) how far
the clock was moved. The shorter the interval or the longer the time interval,
the more we loop. I suppose the best thing to do here is an div, but will, most
of the time, take longer.
>
>
>> tmr->it_id = (timer_t)-1;
>>+ INIT_LIST_HEAD(&tmr->abs_timer_entry);
>> if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
>
>
> The cat ate your tab key? ;)

Oh, shit. And I thought I had the cat locked away in the back room. ;)
>
>
>>+ if (!list_empty(&timr->abs_timer_entry)) {
>>+ spin_lock(&abs_list.lock);
>>+ list_del_init(&timr->abs_timer_entry);
>>+ spin_unlock(&abs_list.lock);
>>+ }
>
>
> This is repeated often. Does it merit its own function?

Ok.
>
>
>>+static DECLARE_MUTEX(clock_was_set_lock);
>>+#define mutex_enter(x) down(x)
>>+#define mutex_enter_interruptable(x) down_interruptible(x)
>>+#define mutex_exit(x) up(x)
>
>
> Please open-code these operations.

Eh? Seems funny that we have a definition for mutex (DECLARE_MUTEX) and don't
have the defines to use them. But, ok.
>
>

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml

2004-06-29 04:49:38

by George Anzinger

[permalink] [raw]
Subject: [PATCH] Bugfix for CLOCK_REALTIME absolute timer.

diff -urP -I '\$Id:.*Exp \$' -X /usr/src/patch.exclude linux-2.6.6-kbdw/include/linux/posix-timers.h linux/include/linux/posix-timers.h
--- linux-2.6.6-kbdw/include/linux/posix-timers.h 2003-11-10 17:09:04.000000000 -0800
+++ linux/include/linux/posix-timers.h 2004-06-28 19:16:44.000000000 -0700
@@ -1,8 +1,16 @@
#ifndef _linux_POSIX_TIMERS_H
#define _linux_POSIX_TIMERS_H

+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+struct k_clock_abs {
+ struct list_head list;
+ spinlock_t lock;
+};
struct k_clock {
int res; /* in nano seconds */
+ struct k_clock_abs *abs_struct;
int (*clock_set) (struct timespec * tp);
int (*clock_get) (struct timespec * tp);
int (*nsleep) (int flags,
@@ -23,8 +31,14 @@
#define posix_time_before(timer, now) \
time_before((timer)->expires, (now)->jiffies)

-#define posix_bump_timer(timr) do { \
- (timr)->it_timer.expires += (timr)->it_incr; \
- (timr)->it_overrun++; \
- }while (0)
+#define posix_bump_timer(timr, now) \
+ do { \
+ long delta, orun; \
+ delta = now.jiffies - (timr)->it_timer.expires; \
+ if (delta >= 0) { \
+ orun = 1 + (delta / (timr)->it_incr); \
+ (timr)->it_timer.expires += orun * (timr)->it_incr; \
+ (timr)->it_overrun += orun; \
+ } \
+ }while (0)
#endif
diff -urP -I '\$Id:.*Exp \$' -X /usr/src/patch.exclude linux-2.6.6-kbdw/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.6.6-kbdw/include/linux/sched.h 2004-05-11 17:39:10.000000000 -0700
+++ linux/include/linux/sched.h 2004-06-14 17:40:35.000000000 -0700
@@ -342,6 +342,8 @@
struct task_struct *it_process; /* process to send signal to */
struct timer_list it_timer;
struct sigqueue *sigq; /* signal queue entry. */
+ struct list_head abs_timer_entry; /* clock abs_timer_list */
+ struct timespec wall_to_prev; /* wall_to_monotonic used when set */
};


diff -urP -I '\$Id:.*Exp \$' -X /usr/src/patch.exclude linux-2.6.6-kbdw/kernel/posix-timers.c linux/kernel/posix-timers.c
--- linux-2.6.6-kbdw/kernel/posix-timers.c 2004-05-11 17:39:11.000000000 -0700
+++ linux/kernel/posix-timers.c 2004-06-28 17:18:30.000000000 -0700
@@ -7,6 +7,9 @@
*
* Copyright (C) 2002 2003 by MontaVista Software.
*
+ * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
+ * Copyright (C) 2004 Boris Hu
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
@@ -41,6 +44,7 @@
#include <linux/idr.h>
#include <linux/posix-timers.h>
#include <linux/wait.h>
+#include <linux/workqueue.h>

#ifndef div_long_long_rem
#include <asm/div64.h>
@@ -169,6 +173,12 @@
*/

static struct k_clock posix_clocks[MAX_CLOCKS];
+/*
+ * We only have one real clock that can be set so we need only one abs list,
+ * even if we should want to have several clocks with differing resolutions.
+ */
+static struct k_clock_abs abs_list = {.list = LIST_HEAD_INIT(abs_list.list),
+ .lock = SPIN_LOCK_UNLOCKED};

#define if_clock_do(clock_fun,alt_fun,parms) \
(!clock_fun) ? alt_fun parms : clock_fun parms
@@ -200,8 +210,11 @@
*/
static __init int init_posix_timers(void)
{
- struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
+ struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
+ .abs_struct = &abs_list
+ };
struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
+ .abs_struct = NULL,
.clock_get = do_posix_clock_monotonic_gettime,
.clock_set = do_posix_clock_monotonic_settime
};
@@ -212,7 +225,6 @@
posix_timers_cache = kmem_cache_create("posix_timers_cache",
sizeof (struct k_itimer), 0, 0, 0, 0);
idr_init(&posix_timers_id);
-
return 0;
}

@@ -239,19 +251,92 @@
(NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
}

+/*
+ * This function adjusts the timer as needed as a result of the clock
+ * being set. It should only be called for absolute timers, and then
+ * under the abs_list lock. It computes the time difference and sets
+ * the new jiffies value in the timer. It also updates the timers
+ * reference wall_to_monotonic value. It is complicated by the fact
+ * that tstojiffies() only handles positive times and it needs to work
+ * with both positive and negative times. Also, for negative offsets,
+ * we need to defeat the res round up.
+ *
+ * Return is true if there is a new time, else false.
+ */
+static long add_clockset_delta(struct k_itimer *timr,
+ struct timespec *new_wall_to)
+{
+ struct timespec delta;
+ int sign = 0;
+ u64 exp;
+
+ set_normalized_timespec(&delta,
+ new_wall_to->tv_sec -
+ timr->wall_to_prev.tv_sec,
+ new_wall_to->tv_nsec -
+ timr->wall_to_prev.tv_nsec);
+ if (likely(!(delta.tv_sec | delta.tv_nsec)))
+ return 0;
+ if (delta.tv_sec < 0) {
+ set_normalized_timespec(&delta,
+ -delta.tv_sec,
+ 1 - delta.tv_nsec -
+ posix_clocks[timr->it_clock].res);
+ sign++;
+ }
+ tstojiffie(&delta, posix_clocks[timr->it_clock].res, &exp);
+ timr->wall_to_prev = *new_wall_to;
+ timr->it_timer.expires += (sign ? -exp : exp);
+ return 1;
+}
+
+static void remove_from_abslist(struct k_itimer *timr)
+{
+ if (!list_empty(&timr->abs_timer_entry)) {
+ spin_lock(&abs_list.lock);
+ list_del_init(&timr->abs_timer_entry);
+ spin_unlock(&abs_list.lock);
+ }
+}
+
static void schedule_next_timer(struct k_itimer *timr)
{
+ struct timespec new_wall_to;
struct now_struct now;
+ unsigned long seq;

- /* Set up the timer for the next interval (if there is one) */
+ /*
+ * Set up the timer for the next interval (if there is one).
+ * Note: this code uses the abs_timer_lock to protect
+ * wall_to_prev and must hold it until exp is set, not exactly
+ * obvious...
+
+ * This function is used for CLOCK_REALTIME* and
+ * CLOCK_MONOTONIC* timers. If we ever want to handle other
+ * CLOCKs, the calling code (do_schedule_next_timer) would need
+ * to pull the "clock" info from the timer and dispatch the
+ * "other" CLOCKs "next timer" code (which, I suppose should
+ * also be added to the k_clock structure).
+ */
if (!timr->it_incr)
return;

- posix_get_now(&now);
do {
- posix_bump_timer(timr);
- }while (posix_time_before(&timr->it_timer, &now));
+ seq = read_seqbegin(&xtime_lock);
+ new_wall_to = wall_to_monotonic;
+ posix_get_now(&now);
+ } while (read_seqretry(&xtime_lock, seq));
+
+ if (!list_empty(&timr->abs_timer_entry)) {
+ spin_lock(&abs_list.lock);
+ add_clockset_delta(timr, &new_wall_to);
+
+ posix_bump_timer(timr, now);

+ spin_unlock(&abs_list.lock);
+ } else {
+ posix_bump_timer(timr, now);
+ }
timr->it_overrun_last = timr->it_overrun;
timr->it_overrun = -1;
++timr->it_requeue_pending;
@@ -312,7 +397,15 @@

memset(&timr->sigq->info, 0, sizeof(siginfo_t));

- /* Send signal to the process that owns this timer. */
+ /*
+ * Send signal to the process that owns this timer.
+
+ * This code assumes that all the possible abs_lists share the
+ * same lock (there is only one list at this time). If this is
+ * not the case, the CLOCK info would need to be used to find
+ * the proper abs list lock.
+ */
+
timr->sigq->info.si_signo = timr->it_sigev_signo;
timr->sigq->info.si_errno = 0;
timr->sigq->info.si_code = SI_TIMER;
@@ -320,6 +413,9 @@
timr->sigq->info.si_value = timr->it_sigev_value;
if (timr->it_incr)
timr->sigq->info.si_sys_private = ++timr->it_requeue_pending;
+ else {
+ remove_from_abslist(timr);
+ }

if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
if (unlikely(timr->it_process->flags & PF_EXITING)) {
@@ -350,16 +446,51 @@
* This function gets called when a POSIX.1b interval timer expires. It
* is used as a callback from the kernel internal timer. The
* run_timer_list code ALWAYS calls with interrutps on.
+
+ * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers.
*/
static void posix_timer_fn(unsigned long __data)
{
struct k_itimer *timr = (struct k_itimer *) __data;
unsigned long flags;
+ unsigned long seq;
+ struct timespec delta, new_wall_to;
+ u64 exp = 0;
+ int do_notify = 1;

spin_lock_irqsave(&timr->it_lock, flags);
set_timer_inactive(timr);
- timer_notify_task(timr);
- unlock_timer(timr, flags);
+ if (!list_empty(&timr->abs_timer_entry)) {
+ spin_lock(&abs_list.lock);
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ new_wall_to = wall_to_monotonic;
+ } while (read_seqretry(&xtime_lock, seq));
+ set_normalized_timespec(&delta,
+ new_wall_to.tv_sec -
+ timr->wall_to_prev.tv_sec,
+ new_wall_to.tv_nsec -
+ timr->wall_to_prev.tv_nsec);
+ if (likely((delta.tv_sec | delta.tv_nsec ) == 0)) {
+ /* do nothing, timer is on time */
+ } else if (delta.tv_sec < 0) {
+ /* do nothing, timer is already late */
+ } else {
+ /* timer is early due to a clock set */
+ tstojiffie(&delta,
+ posix_clocks[timr->it_clock].res,
+ &exp);
+ timr->wall_to_prev = new_wall_to;
+ timr->it_timer.expires += exp;
+ add_timer(&timr->it_timer);
+ do_notify = 0;
+ }
+ spin_unlock(&abs_list.lock);
+
+ }
+ if (do_notify)
+ timer_notify_task(timr);
+ unlock_timer(timr, flags); /* hold thru abs lock to keep irq off */
}


@@ -398,6 +529,7 @@
return tmr;
memset(tmr, 0, sizeof (struct k_itimer));
tmr->it_id = (timer_t)-1;
+ INIT_LIST_HEAD(&tmr->abs_timer_entry);
if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
kmem_cache_free(posix_timers_cache, tmr);
tmr = 0;
@@ -626,8 +758,7 @@
if (expires) {
if (timr->it_requeue_pending & REQUEUE_PENDING ||
(timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
- while (posix_time_before(&timr->it_timer, &now))
- posix_bump_timer(timr);
+ posix_bump_timer(timr, now);
expires = timr->it_timer.expires;
}
else
@@ -703,11 +834,10 @@
* time to it to get the proper time for the timer.
*/
static int adjust_abs_time(struct k_clock *clock, struct timespec *tp,
- int abs, u64 *exp)
+ int abs, u64 *exp, struct timespec *wall_to)
{
struct timespec now;
struct timespec oc = *tp;
- struct timespec wall_to_mono;
u64 jiffies_64_f;
int rtn =0;

@@ -715,15 +845,15 @@
/*
* The mask pick up the 4 basic clocks
*/
- if (!(clock - &posix_clocks[0]) & ~CLOCKS_MASK) {
+ if (!((clock - &posix_clocks[0]) & ~CLOCKS_MASK)) {
jiffies_64_f = do_posix_clock_monotonic_gettime_parts(
- &now, &wall_to_mono);
+ &now, wall_to);
/*
* If we are doing a MONOTONIC clock
*/
if((clock - &posix_clocks[0]) & CLOCKS_MONO){
- now.tv_sec += wall_to_mono.tv_sec;
- now.tv_nsec += wall_to_mono.tv_nsec;
+ now.tv_sec += wall_to->tv_sec;
+ now.tv_nsec += wall_to->tv_nsec;
}
} else {
/*
@@ -813,6 +943,8 @@
#else
del_timer(&timr->it_timer);
#endif
+ remove_from_abslist(timr);
+
timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
~REQUEUE_PENDING;
timr->it_overrun_last = 0;
@@ -827,24 +959,25 @@

if (adjust_abs_time(clock,
&new_setting->it_value, flags & TIMER_ABSTIME,
- &expire_64)) {
+ &expire_64, &(timr->wall_to_prev))) {
return -EINVAL;
}
timr->it_timer.expires = (unsigned long)expire_64;
tstojiffie(&new_setting->it_interval, clock->res, &expire_64);
timr->it_incr = (unsigned long)expire_64;

-
/*
- * For some reason the timer does not fire immediately if expires is
- * equal to jiffies, so the timer notify function is called directly.
- * We do not even queue SIGEV_NONE timers!
+ * We do not even queue SIGEV_NONE timers! But we do put them
+ * in the abs list so we can do that right.
*/
- if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)) {
- if (timr->it_timer.expires == jiffies)
- timer_notify_task(timr);
- else
- add_timer(&timr->it_timer);
+ if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
+ add_timer(&timr->it_timer);
+
+ if (flags & TIMER_ABSTIME && clock->abs_struct) {
+ spin_lock(&clock->abs_struct->lock);
+ list_add_tail(&(timr->abs_timer_entry),
+ &(clock->abs_struct->list));
+ spin_unlock(&clock->abs_struct->lock);
}
return 0;
}
@@ -878,7 +1011,7 @@
if (!posix_clocks[timr->it_clock].timer_set)
error = do_timer_settime(timr, flags, &new_spec, rtn);
else
- error = posix_clocks[timr->it_clock].timer_set(timr,
+ error = posix_clocks[timr->it_clock].timer_set(timr,
flags,
&new_spec, rtn);
unlock_timer(timr, flag);
@@ -911,6 +1044,8 @@
#else
del_timer(&timer->it_timer);
#endif
+ remove_from_abslist(timer);
+
return 0;
}

@@ -1153,13 +1288,93 @@
* On locking, clock_was_set() is called from update_wall_clock which
* holds (or has held for it) a write_lock_irq( xtime_lock) and is
* called from the timer bh code. Thus we need the irq save locks.
+ *
+ * Also, on the call from update_wall_clock, that is done as part of a
+ * softirq thing. We don't want to delay the system that much (possibly
+ * long list of timers to fix), so we defer that work to keventd.
*/

static DECLARE_WAIT_QUEUE_HEAD(nanosleep_abs_wqueue);
+static DECLARE_WORK(clock_was_set_work, (void(*)(void*))clock_was_set, NULL);
+
+static DECLARE_MUTEX(clock_was_set_lock);

void clock_was_set(void)
{
+ struct k_itimer *timr;
+ struct timespec new_wall_to;
+ LIST_HEAD(cws_list);
+ unsigned long seq;
+
+
+ if (unlikely(in_interrupt())) {
+ schedule_work(&clock_was_set_work);
+ return;
+ }
wake_up_all(&nanosleep_abs_wqueue);
+
+ /*
+ * Check if there exist TIMER_ABSTIME timers to correct.
+ *
+ * Notes on locking: This code is run in task context with irq
+ * on. We CAN be interrupted! All other usage of the abs list
+ * lock is under the timer lock which holds the irq lock as
+ * well. We REALLY don't want to scan the whole list with the
+ * interrupt system off, AND we would like a sequence lock on
+ * this code as well. Since we assume that the clock will not
+ * be set often, it seems ok to take and release the irq lock
+ * for each timer. In fact add_timer will do this, so this is
+ * not an issue. So we know when we are done, we will move the
+ * whole list to a new location. Then as we process each entry,
+ * we will move it to the actual list again. This way, when our
+ * copy is empty, we are done. We are not all that concerned
+ * about preemption so we will use a semaphore lock to protect
+ * aginst reentry. This way we will not stall another
+ * processor. It is possible that this may delay some timers
+ * that should have expired, given the new clock, but even this
+ * will be minimal as we will always update to the current time,
+ * even if it was set by a task that is waiting for entry to
+ * this code. Timers that expire too early will be caught by
+ * the expire code and restarted.
+
+ * Absolute timers that repeat are left in the abs list while
+ * waiting for the task to pick up the signal. This means we
+ * may find timers that are not in the "add_timer" list, but are
+ * in the abs list. We do the same thing for these, save
+ * putting them back in the "add_timer" list. (Note, these are
+ * left in the abs list mainly to indicate that they are
+ * ABSOLUTE timers, a fact that is used by the re-arm code, and
+ * for which we have no other flag.)
+
+ */
+
+ down(&clock_was_set_lock);
+ spin_lock_irq(&abs_list.lock);
+ list_splice_init(&abs_list.list, &cws_list);
+ spin_unlock_irq(&abs_list.lock);
+ do {
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ new_wall_to = wall_to_monotonic;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ spin_lock_irq(&abs_list.lock);
+ if (list_empty(&cws_list)) {
+ spin_unlock_irq(&abs_list.lock);
+ break;
+ }
+ timr = list_entry(cws_list.next, struct k_itimer,
+ abs_timer_entry);
+
+ list_del_init(&timr->abs_timer_entry);
+ if (add_clockset_delta(timr, &new_wall_to) &&
+ del_timer(&timr->it_timer)) /* timer run yet? */
+ add_timer(&timr->it_timer);
+ list_add(&timr->abs_timer_entry, &abs_list.list);
+ spin_unlock_irq(&abs_list.lock);
+ } while (1);
+
+ up(&clock_was_set_lock);
}

long clock_nanosleep_restart(struct restart_block *restart_block);
@@ -1202,7 +1417,7 @@
long
do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
{
- struct timespec t;
+ struct timespec t, dum;
struct timer_list new_timer;
DECLARE_WAITQUEUE(abs_wqueue, current);
u64 rq_time = (u64)0;
@@ -1242,7 +1457,7 @@
t = *tsave;
if (abs || !rq_time) {
adjust_abs_time(&posix_clocks[which_clock], &t, abs,
- &rq_time);
+ &rq_time, &dum);
rq_time += (t.tv_sec || t.tv_nsec);
}

Binary files linux-2.6.6-kbdw/scripts/bin2c and linux/scripts/bin2c differ


Attachments:
hrtimer-abs-2.6.6-1.3.patch (16.59 kB)

2004-06-29 04:59:13

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] Bugfix for CLOCK_REALTIME absolute timer.

George Anzinger <[email protected]> wrote:
>
> Ok, I think this does it.

patching file kernel/posix-timers.c
Hunk #10 FAILED at 529.
Hunk #11 succeeded at 776 (offset 18 lines).
Hunk #13 succeeded at 863 (offset 18 lines).
Hunk #15 succeeded at 977 (offset 18 lines).
Hunk #17 succeeded at 1062 (offset 18 lines).
Hunk #19 succeeded at 1435 (offset 18 lines).
1 out of 20 hunks FAILED -- saving rejects to file kernel/posix-timers.c.rej

I fixed that up - please test next -mm, check that it all works?

+ do {
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ new_wall_to = wall_to_monotonic;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ spin_lock_irq(&abs_list.lock);
+ if (list_empty(&cws_list)) {
+ spin_unlock_irq(&abs_list.lock);
+ break;
+ }
+ timr = list_entry(cws_list.next, struct k_itimer,
+ abs_timer_entry);
+
+ list_del_init(&timr->abs_timer_entry);
+ if (add_clockset_delta(timr, &new_wall_to) &&
+ del_timer(&timr->it_timer)) /* timer run yet? */
+ add_timer(&timr->it_timer);
+ list_add(&timr->abs_timer_entry, &abs_list.list);
+ spin_unlock_irq(&abs_list.lock);
+ } while (1);

nanonit:

for ( ; ; ) {
...
}

is more readable than

do {
...
} while (1);

because you can see what it's doing as the eye enters the code...

2004-06-29 08:45:55

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] Bugfix for CLOCK_REALTIME absolute timer.

Andrew Morton wrote:
> George Anzinger <[email protected]> wrote:
>
>>Ok, I think this does it.
>
>
> patching file kernel/posix-timers.c
> Hunk #10 FAILED at 529.
> Hunk #11 succeeded at 776 (offset 18 lines).
> Hunk #13 succeeded at 863 (offset 18 lines).
> Hunk #15 succeeded at 977 (offset 18 lines).
> Hunk #17 succeeded at 1062 (offset 18 lines).
> Hunk #19 succeeded at 1435 (offset 18 lines).
> 1 out of 20 hunks FAILED -- saving rejects to file kernel/posix-timers.c.rej
>
> I fixed that up - please test next -mm, check that it all works?

Will do.
>
> + do {
> + do {
> + seq = read_seqbegin(&xtime_lock);
> + new_wall_to = wall_to_monotonic;
> + } while (read_seqretry(&xtime_lock, seq));
> +
> + spin_lock_irq(&abs_list.lock);
> + if (list_empty(&cws_list)) {
> + spin_unlock_irq(&abs_list.lock);
> + break;
> + }
> + timr = list_entry(cws_list.next, struct k_itimer,
> + abs_timer_entry);
> +
> + list_del_init(&timr->abs_timer_entry);
> + if (add_clockset_delta(timr, &new_wall_to) &&
> + del_timer(&timr->it_timer)) /* timer run yet? */
> + add_timer(&timr->it_timer);
> + list_add(&timr->abs_timer_entry, &abs_list.list);
> + spin_unlock_irq(&abs_list.lock);
> + } while (1);
>
> nanonit:
>
> for ( ; ; ) {
> ...
> }
>
> is more readable than
>
> do {
> ...
> } while (1);
>
> because you can see what it's doing as the eye enters the code...
>
Noted, thanks.



--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml

2004-06-30 19:14:08

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] Bugfix for CLOCK_REALTIME absolute timer.

Andrew Morton wrote:
~

> I fixed that up - please test next -mm, check that it all works?

It passes all my tests. Boris gives a thumbs up also.


--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml