New rules:
->_base & 1 : is timer pending
->_base & ~1 : timer's base
If ->_base == NULL, this timer is not running on any cpu.
Cleared only in del_timer_sync().
Note that now we don't need del_singleshot_timer_sync().
Here is the code of del_timer_sync:
int del_timer_sync(struct timer_list *timer)
{
int ret = 0;
for (;;) {
unsigned long flags;
tvec_base_t *_base, *base;
_base = timer->_base;
if (!_base)
return ret;
base = __timer_base(_base);
spin_lock_irqsave(&base->lock, flags);
if (timer->_base != _base)
goto unlock;
if (base->running_timer == timer)
goto unlock;
if (__timer_pending(_base)) {
list_del(&timer->entry);
ret = 1;
}
smp_wmb();
timer->_base = NULL;
unlock:
spin_unlock_irqrestore(&base->lock, flags);
}
}
Signed-off-by: Oleg Nesterov <[email protected]>
--- 2.6.11/include/linux/timer.h~2_PEND 2005-03-15 17:49:19.000000000 +0300
+++ 2.6.11/include/linux/timer.h 2005-03-15 18:09:06.000000000 +0300
@@ -21,9 +21,11 @@ struct timer_list {
struct tvec_t_base_s *_base;
};
+#define __TIMER_PENDING 1
+
static inline int __timer_pending(struct tvec_t_base_s *base)
{
- return base != NULL;
+ return ((unsigned long)base & __TIMER_PENDING) != 0;
}
#define TIMER_MAGIC 0x4b87ad6e
--- 2.6.11/kernel/timer.c~2_PEND 2005-03-15 17:55:00.000000000 +0300
+++ 2.6.11/kernel/timer.c 2005-03-15 19:52:48.000000000 +0300
@@ -86,16 +86,26 @@ static inline void set_running_timer(tve
static inline tvec_base_t *__get_base(struct timer_list *timer)
{
- return timer->_base;
+ tvec_base_t *base = timer->_base;
+
+ if (__timer_pending(base))
+ return (void*)base - __TIMER_PENDING;
+ else
+ return NULL;
}
static inline void __set_base(struct timer_list *timer,
tvec_base_t *base, int pending)
{
if (pending)
- timer->_base = base;
+ timer->_base = (void*)base + __TIMER_PENDING;
else
- timer->_base = NULL;
+ timer->_base = base;
+}
+
+static inline tvec_base_t *__timer_base(tvec_base_t *base)
+{
+ return (tvec_base_t*)((unsigned long)base & ~__TIMER_PENDING);
}
/* Fake initialization */
@@ -356,29 +366,39 @@ EXPORT_SYMBOL(del_timer);
*/
int del_timer_sync(struct timer_list *timer)
{
- tvec_base_t *base;
- int i, ret = 0;
+ int ret;
check_timer(timer);
-del_again:
- ret += del_timer(timer);
+ ret = 0;
+ for (;;) {
+ unsigned long flags;
+ tvec_base_t *_base, *base;
+
+ _base = timer->_base;
+ if (!_base)
+ return ret;
- for_each_online_cpu(i) {
- base = &per_cpu(tvec_bases, i);
- if (base->running_timer == timer) {
- while (base->running_timer == timer) {
- cpu_relax();
- preempt_check_resched();
- }
- break;
+ base = __timer_base(_base);
+ spin_lock_irqsave(&base->lock, flags);
+
+ if (timer->_base != _base)
+ goto unlock;
+
+ if (base->running_timer == timer)
+ goto unlock;
+
+ if (__timer_pending(_base)) {
+ list_del(&timer->entry);
+ ret = 1;
}
+ /* Need to make sure that anybody who sees a NULL base
+ * also sees the list ops */
+ smp_wmb();
+ timer->_base = NULL;
+unlock:
+ spin_unlock_irqrestore(&base->lock, flags);
}
- smp_rmb();
- if (timer_pending(timer))
- goto del_again;
-
- return ret;
}
EXPORT_SYMBOL(del_timer_sync);
* Oleg Nesterov <[email protected]> wrote:
> New rules:
> ->_base & 1 : is timer pending
> ->_base & ~1 : timer's base
how would it look like if we had a separate timer->pending field after
all? Would it be faster/cleaner?
(we dont need to keep them small _that_ bad - if there's a good reason
we should rather add a clean new field than to encode two fields into
one field and complicate the code.)
Ingo
Ingo Molnar wrote:
>
> * Oleg Nesterov <[email protected]> wrote:
>
> > New rules:
> > ->_base & 1 : is timer pending
> > ->_base & ~1 : timer's base
>
> how would it look like if we had a separate timer->pending field after
> all? Would it be faster/cleaner?
The only change visible outside kernel/timer.c is:
static inline int timer_pending(const struct timer_list * timer)
{
- return timer->base != NULL;
+ return timer->base & 1;
}
Currently __get_base() usage in the kernel/time.c suboptimal and
should be cleanuped, I see no other problems with performance.
> (we dont need to keep them small _that_ bad - if there's a good reason
> we should rather add a clean new field than to encode two fields into
> one field and complicate the code.)
I think that separate timer->pending field will require more changes,
because we can't read/write base+pending atomically.
int del_timer()
{
again:
if (!timer->pending) // not strictly necessary, but it is
return 0; // nice to avoid locking
base = timer->base;
if (!base)
return 0;
spin_lock(base->lock);
if (!timer->pending) {
spin_unlock();
goto again;
}
if (timer->base != base) {
spin_unlock();
goto again;
}
....
}
Note also, that we have to audit every timer->base usage anyway,
because currently it mix base and pending.
But may be you are right, the encoding of a bit in a pointer is
indeed weird.
Oleg.
* Oleg Nesterov <[email protected]> wrote:
> I think that separate timer->pending field will require more changes,
> because we can't read/write base+pending atomically.
i think that's the killer argument in favor of the bit-trick. Being able
to read/write base+pending atomically is a good excuse. Using less RAM
alone is not necessarily a good excuse.
Andrew, could we give Oleg's 2 patches a whirl in -mm?
Ingo