2013-06-12 12:06:52

by Kirill Tkhai

[permalink] [raw]
Subject: [PATCH] spin_unlock*_no_resched()

There are many constructions like:

spin_unlock_irq(lock);
schedule();

In case of preemptible kernel we check if task needs reschedule
at the end of spin_unlock(). So if TIF_NEED_RESCHED is set
we call schedule() twice and we have a little overhead here.
Add primitives to avoid these situations.

Signed-off-by: Kirill Tkhai <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Peter Zijlstra <[email protected]>
---
include/linux/spinlock.h | 27 +++++++++++++++++++++++++++
include/linux/spinlock_api_smp.h | 37 +++++++++++++++++++++++++++++++++++++
include/linux/spinlock_api_up.h | 13 +++++++++++++
kernel/spinlock.c | 20 ++++++++++++++++++++
4 files changed, 97 insertions(+), 0 deletions(-)
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 7d537ce..35caa32 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -221,13 +221,24 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
#define raw_spin_lock_irq(lock) _raw_spin_lock_irq(lock)
#define raw_spin_lock_bh(lock) _raw_spin_lock_bh(lock)
#define raw_spin_unlock(lock) _raw_spin_unlock(lock)
+#define raw_spin_unlock_no_resched(lock) \
+ _raw_spin_unlock_no_resched(lock)
+
#define raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock)
+#define raw_spin_unlock_irq_no_resched(lock) \
+ _raw_spin_unlock_irq_no_resched(lock)

#define raw_spin_unlock_irqrestore(lock, flags) \
do { \
typecheck(unsigned long, flags); \
_raw_spin_unlock_irqrestore(lock, flags); \
} while (0)
+#define raw_spin_unlock_irqrestore_no_resched(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _raw_spin_unlock_irqrestore_no_resched(lock, flags); \
+ } while (0)
+
#define raw_spin_unlock_bh(lock) _raw_spin_unlock_bh(lock)

#define raw_spin_trylock_bh(lock) \
@@ -325,6 +336,11 @@ static inline void spin_unlock(spinlock_t *lock)
raw_spin_unlock(&lock->rlock);
}

+static inline void spin_unlock_no_resched(spinlock_t *lock)
+{
+ raw_spin_unlock_no_resched(&lock->rlock);
+}
+
static inline void spin_unlock_bh(spinlock_t *lock)
{
raw_spin_unlock_bh(&lock->rlock);
@@ -335,11 +351,22 @@ static inline void spin_unlock_irq(spinlock_t *lock)
raw_spin_unlock_irq(&lock->rlock);
}

+static inline void spin_unlock_irq_no_resched(spinlock_t *lock)
+{
+ raw_spin_unlock_irq_no_resched(&lock->rlock);
+}
+
static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
{
raw_spin_unlock_irqrestore(&lock->rlock, flags);
}

+static inline void spin_unlock_irqrestore_no_resched(spinlock_t *lock,
+ unsigned long flags)
+{
+ raw_spin_unlock_irqrestore_no_resched(&lock->rlock, flags);
+}
+
static inline int spin_trylock_bh(spinlock_t *lock)
{
return raw_spin_trylock_bh(&lock->rlock);
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 51df117..541d371 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -37,11 +37,18 @@ _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock, int subclass)
int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock);
int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock);
void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock) __releases(lock);
+void __lockfunc
+_raw_spin_unlock_no_resched(raw_spinlock_t *lock) __releases(lock);
void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock) __releases(lock);
void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock) __releases(lock);
void __lockfunc
+_raw_spin_unlock_irq_no_resched(raw_spinlock_t *lock) __releases(lock);
+void __lockfunc
_raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
__releases(lock);
+void __lockfunc
+_raw_spin_unlock_irqrestore_no_resched(raw_spinlock_t *lock, unsigned long flags)
+ __releases(lock);

#ifdef CONFIG_INLINE_SPIN_LOCK
#define _raw_spin_lock(lock) __raw_spin_lock(lock)
@@ -69,6 +76,8 @@ _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)

#ifndef CONFIG_UNINLINE_SPIN_UNLOCK
#define _raw_spin_unlock(lock) __raw_spin_unlock(lock)
+#define _raw_spin_unlock_no_resched(lock) \
+ __raw_spin_unlock_no_resched(lock)
#endif

#ifdef CONFIG_INLINE_SPIN_UNLOCK_BH
@@ -77,10 +86,14 @@ _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)

#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQ
#define _raw_spin_unlock_irq(lock) __raw_spin_unlock_irq(lock)
+#define _raw_spin_unlock_irq_no_resched(lock) \
+ __raw_spin_unlock_irq_no_resched(lock)
#endif

#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
#define _raw_spin_unlock_irqrestore(lock, flags) __raw_spin_unlock_irqrestore(lock, flags)
+#define _raw_spin_unlock_irqrestore_no_resched(lock, flags) \
+ __raw_spin_unlock_irqrestore_no_resched(lock, flags)
#endif

static inline int __raw_spin_trylock(raw_spinlock_t *lock)
@@ -153,6 +166,13 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
preempt_enable();
}

+static inline void __raw_spin_unlock_no_resched(raw_spinlock_t *lock)
+{
+ spin_release(&lock->dep_map, 1, _RET_IP_);
+ do_raw_spin_unlock(lock);
+ preempt_enable_no_resched();
+}
+
static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock,
unsigned long flags)
{
@@ -162,6 +182,15 @@ static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock,
preempt_enable();
}

+static inline void __raw_spin_unlock_irqrestore_no_resched(raw_spinlock_t *lock,
+ unsigned long flags)
+{
+ spin_release(&lock->dep_map, 1, _RET_IP_);
+ do_raw_spin_unlock(lock);
+ local_irq_restore(flags);
+ preempt_enable_no_resched();
+}
+
static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock)
{
spin_release(&lock->dep_map, 1, _RET_IP_);
@@ -170,6 +199,14 @@ static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock)
preempt_enable();
}

+static inline void __raw_spin_unlock_irq_no_resched(raw_spinlock_t *lock)
+{
+ spin_release(&lock->dep_map, 1, _RET_IP_);
+ do_raw_spin_unlock(lock);
+ local_irq_enable();
+ preempt_enable_no_resched();
+}
+
static inline void __raw_spin_unlock_bh(raw_spinlock_t *lock)
{
spin_release(&lock->dep_map, 1, _RET_IP_);
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index af1f472..6f78980 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -39,6 +39,9 @@
#define __UNLOCK(lock) \
do { preempt_enable(); __release(lock); (void)(lock); } while (0)

+#define __UNLOCK_NO_RESCHED(lock) \
+ do { preempt_enable_no_resched(); __release(lock); (void)(lock); } while (0)
+
#define __UNLOCK_BH(lock) \
do { preempt_enable_no_resched(); local_bh_enable(); \
__release(lock); (void)(lock); } while (0)
@@ -46,9 +49,15 @@
#define __UNLOCK_IRQ(lock) \
do { local_irq_enable(); __UNLOCK(lock); } while (0)

+#define __UNLOCK_IRQ_NO_RESCHED(lock) \
+ do { local_irq_enable(); __UNLOCK_NO_RESCHED(lock); } while (0)
+
#define __UNLOCK_IRQRESTORE(lock, flags) \
do { local_irq_restore(flags); __UNLOCK(lock); } while (0)

+#define __UNLOCK_IRQRESTORE_NO_RESCHED(lock, flags) \
+ do { local_irq_restore(flags); __UNLOCK_NO_RESCHED(lock); } while (0)
+
#define _raw_spin_lock(lock) __LOCK(lock)
#define _raw_spin_lock_nested(lock, subclass) __LOCK(lock)
#define _raw_read_lock(lock) __LOCK(lock)
@@ -67,16 +76,20 @@
#define _raw_write_trylock(lock) ({ __LOCK(lock); 1; })
#define _raw_spin_trylock_bh(lock) ({ __LOCK_BH(lock); 1; })
#define _raw_spin_unlock(lock) __UNLOCK(lock)
+#define _raw_spin_unlock_no_resched(lock) __UNLOCK_NO_RESCHED(lock)
#define _raw_read_unlock(lock) __UNLOCK(lock)
#define _raw_write_unlock(lock) __UNLOCK(lock)
#define _raw_spin_unlock_bh(lock) __UNLOCK_BH(lock)
#define _raw_write_unlock_bh(lock) __UNLOCK_BH(lock)
#define _raw_read_unlock_bh(lock) __UNLOCK_BH(lock)
#define _raw_spin_unlock_irq(lock) __UNLOCK_IRQ(lock)
+#define _raw_spin_unlock_irq_no_resched(lock) __UNLOCK_IRQ_NO_RESCHED(lock)
#define _raw_read_unlock_irq(lock) __UNLOCK_IRQ(lock)
#define _raw_write_unlock_irq(lock) __UNLOCK_IRQ(lock)
#define _raw_spin_unlock_irqrestore(lock, flags) \
__UNLOCK_IRQRESTORE(lock, flags)
+#define _raw_spin_unlock_irqrestore_no_resched(lock, flags) \
+ __UNLOCK_IRQRESTORE_NO_RESCHED(lock, flags)
#define _raw_read_unlock_irqrestore(lock, flags) \
__UNLOCK_IRQRESTORE(lock, flags)
#define _raw_write_unlock_irqrestore(lock, flags) \
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 5cdd806..c52bdf7 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -169,6 +169,12 @@ void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
__raw_spin_unlock(lock);
}
EXPORT_SYMBOL(_raw_spin_unlock);
+
+void __lockfunc _raw_spin_unlock_no_resched(raw_spinlock_t *lock)
+{
+ __raw_spin_unlock_no_resched(lock);
+}
+EXPORT_SYMBOL(_raw_spin_unlock_no_resched);
#endif

#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
@@ -177,6 +183,13 @@ void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long
__raw_spin_unlock_irqrestore(lock, flags);
}
EXPORT_SYMBOL(_raw_spin_unlock_irqrestore);
+
+void __lockfunc _raw_spin_unlock_irqrestore_no_resched(raw_spinlock_t *lock,
+ unsigned long flags)
+{
+ __raw_spin_unlock_irqrestore_no_resched(lock, flags);
+}
+EXPORT_SYMBOL(_raw_spin_unlock_irqrestore_no_resched);
#endif

#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
@@ -185,6 +198,13 @@ void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
__raw_spin_unlock_irq(lock);
}
EXPORT_SYMBOL(_raw_spin_unlock_irq);
+
+void __lockfunc _raw_spin_unlock_irq_no_resched(raw_spinlock_t *lock)
+{
+ __raw_spin_unlock_irq_no_resched(lock);
+}
+EXPORT_SYMBOL(_raw_spin_unlock_irq_no_resched);
+
#endif

#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH


2013-06-12 12:15:38

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] spin_unlock*_no_resched()

On Wed, Jun 12, 2013 at 04:06:47PM +0400, Kirill Tkhai wrote:
> There are many constructions like:
>
> spin_unlock_irq(lock);
> schedule();
>
> In case of preemptible kernel we check if task needs reschedule
> at the end of spin_unlock(). So if TIF_NEED_RESCHED is set
> we call schedule() twice and we have a little overhead here.
> Add primitives to avoid these situations.
>
> Signed-off-by: Kirill Tkhai <[email protected]>
> CC: Steven Rostedt <[email protected]>
> CC: Ingo Molnar <[email protected]>
> CC: Peter Zijlstra <[email protected]>
> ---
> include/linux/spinlock.h | 27 +++++++++++++++++++++++++++
> include/linux/spinlock_api_smp.h | 37 +++++++++++++++++++++++++++++++++++++
> include/linux/spinlock_api_up.h | 13 +++++++++++++
> kernel/spinlock.c | 20 ++++++++++++++++++++
> 4 files changed, 97 insertions(+), 0 deletions(-)
> diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
> index 7d537ce..35caa32 100644
> --- a/include/linux/spinlock.h
> +++ b/include/linux/spinlock.h
> @@ -221,13 +221,24 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
> #define raw_spin_lock_irq(lock) _raw_spin_lock_irq(lock)
> #define raw_spin_lock_bh(lock) _raw_spin_lock_bh(lock)
> #define raw_spin_unlock(lock) _raw_spin_unlock(lock)
> +#define raw_spin_unlock_no_resched(lock) \
> + _raw_spin_unlock_no_resched(lock)
> +
> #define raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock)
> +#define raw_spin_unlock_irq_no_resched(lock) \
> + _raw_spin_unlock_irq_no_resched(lock)
>
> #define raw_spin_unlock_irqrestore(lock, flags) \
> do { \
> typecheck(unsigned long, flags); \
> _raw_spin_unlock_irqrestore(lock, flags); \
> } while (0)
> +#define raw_spin_unlock_irqrestore_no_resched(lock, flags) \
> + do { \
> + typecheck(unsigned long, flags); \
> + _raw_spin_unlock_irqrestore_no_resched(lock, flags); \
> + } while (0)

So I absolutely hate this API because people can (and invariably will)
abuse it; much like they did/do preempt_enable_no_resched().

IIRC Thomas even maps preempt_enable_no_resched() to preempt_enable() in
-rt to make sure we don't miss preemption points due to stupidity.

He converted the 'few' sane sites to use schedule_preempt_disabled(). In
that vein, does it make sense to introduce schedule_spin_locked()?

Also, your patch 'fails' to make use of the new API.

2013-06-12 13:07:26

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH] spin_unlock*_no_resched()

On Wed, 2013-06-12 at 14:15 +0200, Peter Zijlstra wrote:

> So I absolutely hate this API because people can (and invariably will)
> abuse it; much like they did/do preempt_enable_no_resched().

Me too.

>
> IIRC Thomas even maps preempt_enable_no_resched() to preempt_enable() in
> -rt to make sure we don't miss preemption points due to stupidity.
>
> He converted the 'few' sane sites to use schedule_preempt_disabled(). In
> that vein, does it make sense to introduce schedule_spin_locked()?
>

I was thinking the exact same thing when I read this patch. This is a
strict policy that we should enforce and not let individual developers
implement. Yes, a schedule_spin_unlock() would work nicely. The API will
enforce the two to be used together. Otherwise, I can envision seeing
things like:

preempt_disable();
[...]

spin_lock(x);

spin_unlock_no_resched(x);

[...]

preempt_enable();

And developers having no idea why the above is broken. Although, I would
say the above is broken for other reasons, but I was just using that to
show the craziness such an API would give to us.

-- Steve


2013-06-12 23:05:26

by Kirill Tkhai

[permalink] [raw]
Subject: Re: [PATCH] spin_unlock*_no_resched()

On Wed, 2013-06-12 at 09:07 -0400, Steven Rostedt wrote:
> On Wed, 2013-06-12 at 14:15 +0200, Peter Zijlstra wrote:
>
> > So I absolutely hate this API because people can (and invariably will)
> > abuse it; much like they did/do preempt_enable_no_resched().
>
> Me too.
>
> >
> > IIRC Thomas even maps preempt_enable_no_resched() to preempt_enable() in
> > -rt to make sure we don't miss preemption points due to stupidity.
> >
> > He converted the 'few' sane sites to use schedule_preempt_disabled(). In
> > that vein, does it make sense to introduce schedule_spin_locked()?
> >
>
> I was thinking the exact same thing when I read this patch. This is a
> strict policy that we should enforce and not let individual developers
> implement. Yes, a schedule_spin_unlock() would work nicely. The API will
> enforce the two to be used together.

Steven thanks for your explanation and Peter's, now I looked to this
from another side.

If we speak about combined primitive does it have to be a special
variant of schedule_spin_unlock_* for every irq state? The simplest way
is to do local_irq_enable() always before schedule() call, but I'm not
sure that this is good for all platforms.

For -rt everything of this is completely useless, because number of
raw_spin_locks is small. Maybe changes for some another types of locks
will applicable.

Kirill

> Otherwise, I can envision seeing
> things like:
>
> preempt_disable();
> [...]
>
> spin_lock(x);
>
> spin_unlock_no_resched(x);
>
> [...]
>
> preempt_enable();
>
> And developers having no idea why the above is broken. Although, I would
> say the above is broken for other reasons, but I was just using that to
> show the craziness such an API would give to us.
>
> -- Steve
>
>
>


2013-06-13 00:00:25

by Kirill Tkhai

[permalink] [raw]
Subject: Re: [PATCH] spin_unlock*_no_resched()

On 12/06/13 17:07, Steven Rostedt wrote:
> On Wed, 2013-06-12 at 14:15 +0200, Peter Zijlstra wrote:
>
>> So I absolutely hate this API because people can (and invariably will)
>> abuse it; much like they did/do preempt_enable_no_resched().
>
> Me too.
>
>>
>> IIRC Thomas even maps preempt_enable_no_resched() to preempt_enable() in
>> -rt to make sure we don't miss preemption points due to stupidity.
>>
>> He converted the 'few' sane sites to use schedule_preempt_disabled(). In
>> that vein, does it make sense to introduce schedule_spin_locked()?
>>
>
> I was thinking the exact same thing when I read this patch. This is a
> strict policy that we should enforce and not let individual developers
> implement. Yes, a schedule_spin_unlock() would work nicely. The API will
> enforce the two to be used together. Otherwise, I can envision seeing
> things like:
>
> preempt_disable();
> [...]
>
> spin_lock(x);
>
> spin_unlock_no_resched(x);
>
> [...]
>
> preempt_enable();
>
> And developers having no idea why the above is broken. Although, I would
> say the above is broken for other reasons, but I was just using that to
> show the craziness such an API would give to us.
>
> -- Steve
>
>
>

In additional to my previous letter. If spin_lock is locked then irqs
must be disabled. So sorry for the noise.

2013-06-18 10:05:46

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] spin_unlock*_no_resched()

On Wed, Jun 12, 2013 at 04:06:47PM +0400, Kirill Tkhai wrote:
> There are many constructions like:
>
> spin_unlock_irq(lock);
> schedule();
>
> In case of preemptible kernel we check if task needs reschedule
> at the end of spin_unlock(). So if TIF_NEED_RESCHED is set
> we call schedule() twice and we have a little overhead here.
> Add primitives to avoid these situations.
>

Aside from the excellent comments you received from Steven, please also
include a patch that converts some (if not all) of these many callsites
you found so that there's a user of the new code in the kernel.

That way we can simply build + run the kernel and have more confidence
things work as supposed.