LinuxLists.cc - [PATCH v0] add nano semaphore in kernel

2010-12-26 05:13:45

by Hillf Danton

[permalink] [raw]

Subject: [PATCH v0] add nano semaphore in kernel

Based upon high resolution timer and idea borrowed from semaphore,
nano semaphore is created.

Nano semaphore provides finer time resolution depending on system
configuration and capabilities.

Nano semaphore is not to replace semaphore, but used in application
environments where nano seconds are required.

Three methods, nano_semaphore_try_down, nano_semaphore_down and
nano_semaphore_up are implemented in a header file, and there is no
corresponding C file since nano semaphore is not complex.

Signed-off-by: Hillf Danton <[email protected]>
---

--- a/include/linux/nano_semaphore.h 1970-01-01 08:00:00.000000000 +0800
+++ b/include/linux/nano_semaphore.h 2010-12-26 12:38:36.000000000 +0800
@@ -0,0 +1,263 @@
+/*
+ * linux/nano_semaphore.h
+ *
+ * Definition and implementation of nano semaphore
+ *
+ * Nano semaphore provides finer time resolution depending on system
+ * configuration and capabilities.
+ *
+ * Nano semaphore could be used in parallel with semaphore.
+ *
+ * Started-by: Hillf Danton
+ *
+ * Credits:
+ * ideas are borrowed from semaphore and high resolution timer
+ *
+ * Distributed under the terms of GPL v2
+ */
+
+#ifndef __NANO_SEMAPHORE_H_
+#define __NANO_SEMAPHORE_H_
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
+#include <linux/hrtimer.h>
+
+/* must be initialized before use */
+struct nano_semaphore {
+ struct list_head chair;
+ struct task_struct *holder;
+ spinlock_t lock;
+};
+
+#define NANO_SEMAPHORE_INITIALIZER(name) \
+{ \
+ .chair = LIST_HEAD_INIT((name).chair), \
+ .holder = NULL, \
+ .lock = __SPIN_LOCK_UNLOCKED((name).lock), \
+}
+
+
+#define DEFINE_NANO_SEMAPHORE(name) \
+ struct nano_semaphore name = NANO_SEMAPHORE_INITIALIZER(name)
+
+
+static inline void nano_semaphore_init(struct nano_semaphore *s)
+{
+ INIT_LIST_HEAD(&s->chair);
+ s->holder = NULL;
+ spin_lock_init(&s->lock);
+}
+
+/*
+ * Helper functions about nano semaphore
+ */
+
+/*
+ * Helper function to check, whether anyone is waiting the nano semaphore
+ *
+ * Called with lock hold
+ */
+static inline int nano_semaphore_waiter_pending(struct nano_semaphore *s)
+{
+ return !list_empty(&s->chair);
+}
+
+/*
+ * Helper function to check, whether the nano semaphore is not hold by anyone
+ *
+ * Called with lock hold
+ */
+static inline int nano_semaphore_holder_empty(struct nano_semaphore *s)
+{
+ return !s->holder;
+}
+
+/*
+ * Helper function to check, whether `task' is the holder of nano semaphore
+ *
+ * Called with lock hold
+ */
+static inline int
+nano_semaphore_holder_match(struct nano_semaphore *s, struct task_struct *task)
+{
+ return s->holder == task;
+}
+
+/*
+ * Helper function to set `task' to be the holder of nano semaphore
+ *
+ * Called with lock hold
+ */
+static inline void
+nano_semaphore_set_holder(struct nano_semaphore *s, struct task_struct *task)
+{
+ s->holder = task;
+}
+
+/*
+ * Helper function try to acquire the nano semaphore
+ *
+ * Returns 1 if acquired successfully, 0 otherwise.
+ *
+ * Called with lock hold
+ */
+static inline int __nano_semaphore_try_down(struct nano_semaphore *s)
+{
+ int ret;
+
+ ret = !nano_semaphore_waiter_pending(s) &&
+ nano_semaphore_holder_empty(s);
+ if (ret)
+ nano_semaphore_set_holder(s, current);
+
+ return ret;
+}
+
+/*
+ * nano_semaphore_try_down - try to acquire the nano semaphore without waiting
+ * @s: the nano semaphore to be acquired
+ *
+ * Returns 1 if acquired successfully, 0 otherwise.
+ */
+static inline int nano_semaphore_try_down(struct nano_semaphore *s)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&s->lock, flags);
+ ret = __nano_semaphore_try_down(s);
+ spin_unlock_irqrestore(&s->lock, flags);
+
+ return ret;
+}
+
+
+/* only for internal use by nano semaphore */
+struct nano_semaphore_waiter {
+ struct list_head node;
+ struct task_struct *task;
+};
+
+/*
+ * Helper function to init a waiter with `task'
+ */
+static inline void nano_semaphore_waiter_init(struct nano_semaphore_waiter *w,
+ struct task_struct *task)
+{
+ INIT_LIST_HEAD(&w->node);
+ w->task = task;
+}
+
+/*
+ * Helper function to get the first pending waiter of nano semaphore
+ *
+ * Called with lock hold
+ */
+struct inline struct nano_semaphore_waiter *
+nano_semaphore_get_waiter(struct nano_semaphore *s)
+{
+ if (nano_semaphore_waiter_pending(s))
+ return list_first_entry(&s->chair,
+ struct nano_semaphore_waiter, node);
+ return NULL;
+}
+
+/*
+ * Helper function to sleep a while
+ *
+ * Called with lock not hold
+ */
+static inline int nano_semaphore_sleep(unsigned long nano_secs)
+{
+ int ret;
+
+ if (nano_secs) {
+ ktime_t ktime = ktime_set(0, nano_secs);
+
+ ret = schedule_hrtimeout(&ktime, HRTIMER_MODE_REL);
+ } else
+ ret = schedule_hrtimeout(NULL, HRTIMER_MODE_REL);
+
+ return ret;
+}
+
+/*
+ * nano_semaphore_down - acquire the nano semaphore
+ * @s: the nano semaphore to be acquired
+ * @nano_secs: the nano seconds to wait if necessary,
+ * could be zero if want to wait as long as possible.
+ *
+ * Returns >0 if acquired successfully, <=0 otherwise.
+ *
+ * Note unlike down() in semaphore, nano_semaphore_down is not looping until
+ * the nano semaphore is hold, but simply reports the result. And the callers
+ * could, if they like, loop in simple manner, for instance,
+ * while (1 != nano_semaphore_down(s, 800));
+ * do_home_work();
+ * nano_semaphore_up(s);
+ *
+ */
+static inline int
+nano_semaphore_down(struct nano_semaphore *s, unsigned long nano_secs)
+{
+ unsigned long flags;
+ int ret;
+ struct nano_semaphore_waiter w;
+
+ spin_lock_irqsave(&s->lock, flags);
+
+ ret = __nano_semaphore_try_down(s);
+ if (ret)
+ goto out;
+
+ nano_semaphore_waiter_init(&w, current);
+
+ __set_task_state(current, TASK_INTERRUPTIBLE);
+
+ list_add_tail(&w.node, &s->chair);
+
+ spin_unlock_irqrestore(&s->lock, flags);
+ ret = nano_semaphore_sleep(nano_secs);
+ spin_lock_irqsave(&s->lock, flags);
+
+ list_del(&w.node);
+
+ if (nano_semaphore_holder_match(s, current))
+ ret = 1;
+ out:
+ spin_unlock_irqrestore(&s->lock, flags);
+
+ return ret;
+}
+
+/**
+ * nano_semaphore_up - release the nano semaphore
+ * @s: the nano semaphore to release
+ *
+ * Note nano_semaphore_up() could be called even by tasks which have never
+ * called nano_semaphore_down(), but the tricky is not recommended.
+ */
+static inline void nano_semaphore_up(struct nano_semaphore *s)
+{
+ struct nano_semaphore_waiter *w;
+ unsigned long flags;
+
+ spin_lock_irqsave(&s->lock, flags);
+
+ if (! nano_semaphore_holder_match(s, current))
+ goto out;
+
+ w = nano_semaphore_get_waiter(s);
+ if (w) {
+ nano_semaphore_set_holder(s, w->task);
+ wake_up_process(w->task);
+ } else
+ nano_semaphore_set_holder(s, NULL);
+ out:
+ spin_unlock_irqrestore(&s->lock, flags);
+}
+
+#endif /* __NANO_SEMAPHORE_H_ */

2010-12-26 06:47:01

by Rakib Mullick

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Sun, Dec 26, 2010 at 11:13 AM, Hillf Danton <[email protected]> wrote:
> Based upon high resolution timer and idea borrowed from semaphore,
> nano semaphore is created.
>
> Nano semaphore provides finer time resolution depending on system
> configuration and capabilities.
>
> Nano semaphore is not to replace semaphore, but used in application
> environments where nano seconds are required.
>
> Three methods, nano_semaphore_try_down, nano_semaphore_down and
> nano_semaphore_up are implemented in a header file, and there is no
> corresponding C file since nano semaphore is not complex.
>

Above description tells how its done, what it is but its not clear why
we should use it. Can I know why should we use it or its benefits?

thanks,
rakib

> Signed-off-by: Hillf Danton <[email protected]>
> ---
>
> --- a/include/linux/nano_semaphore.h ? ?1970-01-01 08:00:00.000000000 +0800
> +++ b/include/linux/nano_semaphore.h ? ?2010-12-26 12:38:36.000000000 +0800
> @@ -0,0 +1,263 @@
> +/*
> + * ?linux/nano_semaphore.h
> + *
> + * ?Definition and implementation of nano semaphore
> + *
> + * ?Nano semaphore provides finer time resolution depending on system
> + * ?configuration and capabilities.
> + *
> + * ?Nano semaphore could be used in parallel with semaphore.
> + *
> + * ?Started-by: Hillf Danton
> + *
> + * ?Credits:
> + * ? ? ideas are borrowed from semaphore and high resolution timer
> + *
> + * ?Distributed under the terms of GPL v2
> + */
> +
> +#ifndef __NANO_SEMAPHORE_H_
> +#define __NANO_SEMAPHORE_H_
> +
> +#include <linux/list.h>
> +#include <linux/spinlock.h>
> +#include <linux/ktime.h>
> +#include <linux/sched.h>
> +#include <linux/hrtimer.h>
> +
> +/* must be initialized before use */
> +struct nano_semaphore {
> + ? ? ? struct list_head ? ? ? ?chair;
> + ? ? ? struct task_struct ? ? ?*holder;
> + ? ? ? spinlock_t ? ? ? ? ? ? ?lock;
> +};
> +
> +#define NANO_SEMAPHORE_INITIALIZER(name) ? ? ? ? ? ? ? \
> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? .chair ?= LIST_HEAD_INIT((name).chair), ? ? ? ? \
> + ? ? ? .holder = NULL, ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? .lock ? = __SPIN_LOCK_UNLOCKED((name).lock), ? ?\
> +}
> +
> +
> +#define DEFINE_NANO_SEMAPHORE(name) ? ?\
> + ? ? ? struct nano_semaphore name = NANO_SEMAPHORE_INITIALIZER(name)
> +
> +
> +static inline void nano_semaphore_init(struct nano_semaphore *s)
> +{
> + ? ? ? INIT_LIST_HEAD(&s->chair);
> + ? ? ? s->holder = NULL;
> + ? ? ? spin_lock_init(&s->lock);
> +}
> +
> +/*
> + * Helper functions about nano semaphore
> + */
> +
> +/*
> + * Helper function to check, whether anyone is waiting the nano semaphore
> + *
> + * Called with lock hold
> + */
> +static inline int nano_semaphore_waiter_pending(struct nano_semaphore *s)
> +{
> + ? ? ? return !list_empty(&s->chair);
> +}
> +
> +/*
> + * Helper function to check, whether the nano semaphore is not hold by anyone
> + *
> + * Called with lock hold
> + */
> +static inline int nano_semaphore_holder_empty(struct nano_semaphore *s)
> +{
> + ? ? ? return !s->holder;
> +}
> +
> +/*
> + * Helper function to check, whether `task' is the holder of nano semaphore
> + *
> + * Called with lock hold
> + */
> +static inline int
> +nano_semaphore_holder_match(struct nano_semaphore *s, struct task_struct *task)
> +{
> + ? ? ? return s->holder == task;
> +}
> +
> +/*
> + * Helper function to set `task' to be the holder of nano semaphore
> + *
> + * Called with lock hold
> + */
> +static inline void
> +nano_semaphore_set_holder(struct nano_semaphore *s, struct task_struct *task)
> +{
> + ? ? ? s->holder = task;
> +}
> +
> +/*
> + * Helper function try to acquire the nano semaphore
> + *
> + * Returns 1 if acquired successfully, 0 otherwise.
> + *
> + * Called with lock hold
> + */
> +static inline int __nano_semaphore_try_down(struct nano_semaphore *s)
> +{
> + ? ? ? int ret;
> +
> + ? ? ? ret = !nano_semaphore_waiter_pending(s) &&
> + ? ? ? ? ? ? ? nano_semaphore_holder_empty(s);
> + ? ? ? if (ret)
> + ? ? ? ? ? ? ? nano_semaphore_set_holder(s, current);
> +
> + ? ? ? return ret;
> +}
> +
> +/*
> + * nano_semaphore_try_down - try to acquire the nano semaphore without waiting
> + * @s: the nano semaphore to be acquired
> + *
> + * Returns 1 if acquired successfully, 0 otherwise.
> + */
> +static inline int nano_semaphore_try_down(struct nano_semaphore *s)
> +{
> + ? ? ? unsigned long flags;
> + ? ? ? int ret;
> +
> + ? ? ? spin_lock_irqsave(&s->lock, flags);
> + ? ? ? ret = __nano_semaphore_try_down(s);
> + ? ? ? spin_unlock_irqrestore(&s->lock, flags);
> +
> + ? ? ? return ret;
> +}
> +
> +
> +/* only for internal use by nano semaphore */
> +struct nano_semaphore_waiter {
> + ? ? ? struct list_head ? ? ? ?node;
> + ? ? ? struct task_struct ? ? ?*task;
> +};
> +
> +/*
> + * Helper function to init a waiter with `task'
> + */
> +static inline void nano_semaphore_waiter_init(struct nano_semaphore_waiter *w,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct task_struct *task)
> +{
> + ? ? ? INIT_LIST_HEAD(&w->node);
> + ? ? ? w->task = task;
> +}
> +
> +/*
> + * Helper function to get the first pending waiter of nano semaphore
> + *
> + * Called with lock hold
> + */
> +struct inline struct nano_semaphore_waiter *
> +nano_semaphore_get_waiter(struct nano_semaphore *s)
> +{
> + ? ? ? if (nano_semaphore_waiter_pending(s))
> + ? ? ? ? ? ? ? return list_first_entry(&s->chair,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct nano_semaphore_waiter, node);
> + ? ? ? return NULL;
> +}
> +
> +/*
> + * Helper function to sleep a while
> + *
> + * Called with lock not hold
> + */
> +static inline int nano_semaphore_sleep(unsigned long nano_secs)
> +{
> + ? ? ? int ret;
> +
> + ? ? ? if (nano_secs) {
> + ? ? ? ? ? ? ? ktime_t ktime = ktime_set(0, nano_secs);
> +
> + ? ? ? ? ? ? ? ret = schedule_hrtimeout(&ktime, HRTIMER_MODE_REL);
> + ? ? ? } else
> + ? ? ? ? ? ? ? ret = schedule_hrtimeout(NULL, HRTIMER_MODE_REL);
> +
> + ? ? ? return ret;
> +}
> +
> +/*
> + * nano_semaphore_down - acquire the nano semaphore
> + * @s: the nano semaphore to be acquired
> + * @nano_secs: the nano seconds to wait if necessary,
> + * ? ? ? ? ? ? could be zero if want to wait as long as possible.
> + *
> + * Returns >0 if acquired successfully, <=0 otherwise.
> + *
> + * Note unlike down() in semaphore, nano_semaphore_down is not looping until
> + * the nano semaphore is hold, but simply reports the result. And the callers
> + * could, if they like, loop in simple manner, for instance,
> + * ? ? while (1 != nano_semaphore_down(s, 800));
> + * ? ? do_home_work();
> + * ? ? nano_semaphore_up(s);
> + *
> + */
> +static inline int
> +nano_semaphore_down(struct nano_semaphore *s, unsigned long nano_secs)
> +{
> + ? ? ? unsigned long flags;
> + ? ? ? int ret;
> + ? ? ? struct nano_semaphore_waiter w;
> +
> + ? ? ? spin_lock_irqsave(&s->lock, flags);
> +
> + ? ? ? ret = __nano_semaphore_try_down(s);
> + ? ? ? if (ret)
> + ? ? ? ? ? ? ? goto out;
> +
> + ? ? ? nano_semaphore_waiter_init(&w, current);
> +
> + ? ? ? __set_task_state(current, TASK_INTERRUPTIBLE);
> +
> + ? ? ? list_add_tail(&w.node, &s->chair);
> +
> + ? ? ? spin_unlock_irqrestore(&s->lock, flags);
> + ? ? ? ret = nano_semaphore_sleep(nano_secs);
> + ? ? ? spin_lock_irqsave(&s->lock, flags);
> +
> + ? ? ? list_del(&w.node);
> +
> + ? ? ? if (nano_semaphore_holder_match(s, current))
> + ? ? ? ? ? ? ? ret = 1;
> + out:
> + ? ? ? spin_unlock_irqrestore(&s->lock, flags);
> +
> + ? ? ? return ret;
> +}
> +
> +/**
> + * nano_semaphore_up - release the nano semaphore
> + * @s: the nano semaphore to release
> + *
> + * Note nano_semaphore_up() could be called even by tasks which have never
> + * called nano_semaphore_down(), but the tricky is not recommended.
> + */
> +static inline void nano_semaphore_up(struct nano_semaphore *s)
> +{
> + ? ? ? struct nano_semaphore_waiter *w;
> + ? ? ? unsigned long flags;
> +
> + ? ? ? spin_lock_irqsave(&s->lock, flags);
> +
> + ? ? ? if (! nano_semaphore_holder_match(s, current))
> + ? ? ? ? ? ? ? goto out;
> +
> + ? ? ? w = nano_semaphore_get_waiter(s);
> + ? ? ? if (w) {
> + ? ? ? ? ? ? ? nano_semaphore_set_holder(s, w->task);
> + ? ? ? ? ? ? ? wake_up_process(w->task);
> + ? ? ? } else
> + ? ? ? ? ? ? ? nano_semaphore_set_holder(s, NULL);
> + out:
> + ? ? ? spin_unlock_irqrestore(&s->lock, flags);
> +}
> +
> +#endif /* __NANO_SEMAPHORE_H_ */
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at ?http://www.tux.org/lkml/
>

2010-12-26 07:04:24

by Hillf Danton

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Sun, Dec 26, 2010 at 2:46 PM, Rakib Mullick <[email protected]> wrote:
> On Sun, Dec 26, 2010 at 11:13 AM, Hillf Danton <[email protected]> wrote:
>> Based upon high resolution timer and idea borrowed from semaphore,
>> nano semaphore is created.
>>
>> Nano semaphore provides finer time resolution depending on system
>> configuration and capabilities.
>>
>> Nano semaphore is not to replace semaphore, but used in application
>> environments where nano seconds are required.
>>
>> Three methods, nano_semaphore_try_down, nano_semaphore_down and
>> nano_semaphore_up are implemented in a header file, and there is no
>> corresponding C file since nano semaphore is not complex.
>>
>
> Above description tells how its done, what it is but its not clear why
> we should use it. Can I know why should we use it or its benefits?
>

The outstanding benefit looks that nano semaphore could be used in
cases where callers want to wait not only more than one jiffy, but far
less than one jiffy also.

thanks
Hillf

>
> thanks,
> rakib

2010-12-26 09:08:05

by Rakib Mullick

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Sun, Dec 26, 2010 at 1:04 PM, Hillf Danton <[email protected]> wrote:
> On Sun, Dec 26, 2010 at 2:46 PM, Rakib Mullick <[email protected]> wrote:
>> On Sun, Dec 26, 2010 at 11:13 AM, Hillf Danton <[email protected]> wrote:
>> Above description tells how its done, what it is but its not clear why
>> we should use it. Can I know why should we use it or its benefits?
>>
>
> The outstanding benefit looks that nano semaphore could be used in
> cases where callers want to wait not only more than one jiffy, but far
> less than one jiffy also.
>
But, how do we know that resources are going to be available within
one jiffy or far less than one jiffy? We we be deterministic?

thanks,
rakib

2010-12-26 12:05:59

by Hillf Danton

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Sun, Dec 26, 2010 at 5:08 PM, Rakib Mullick <[email protected]> wrote:
> On Sun, Dec 26, 2010 at 1:04 PM, Hillf Danton <[email protected]> wrote:
>> On Sun, Dec 26, 2010 at 2:46 PM, Rakib Mullick <[email protected]> wrote:
>>> On Sun, Dec 26, 2010 at 11:13 AM, Hillf Danton <[email protected]> wrote:
>>> Above description tells how its done, what it is but its not clear why
>>> we should use it. Can I know why should we use it or its benefits?
>>>
>>
>> The outstanding benefit looks that nano semaphore could be used in
>> cases where callers want to wait not only more than one jiffy, but far
>> less than one jiffy also.
>>
> But, how do we know that resources are going to be available within
> one jiffy or far less than one jiffy? We we be deterministic?
>
This is a really hard question.

Though I could not answer as fine as Ingo Molnar could, the
deterministic has to be faced by most contentions for resources in
kernel, simply because deadlock could occur in spin lock for instance.

On the other hand, this question explores a byproduct advantage, not
seriously considered before, of nano semaphore, that the caller could
learn that there is something out of track if she waited over 100
microseconds in 5 consequent steps, 20 microseconds a step, if she
think the resource should be available in 60 microseconds.

And in nano semaphore method is available for callers to select
waiting over one second, one millisecond, one microsecond, one
nanosecond, depending on what the underlying system could offer. After
waiting, the caller is free to determine what to do next if the
resource is not available.

Here is another sample, if taxi will not come in two minutes, I could
either give up shopping downtown, or wait another three minutes,
depending on what will happen two minutes later.

thanks
Hillf

2010-12-26 12:56:35

by Rakib Mullick

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Sun, Dec 26, 2010 at 6:05 PM, Hillf Danton <[email protected]> wrote:
> On Sun, Dec 26, 2010 at 5:08 PM, Rakib Mullick <[email protected]> wrote:
>> On Sun, Dec 26, 2010 at 1:04 PM, Hillf Danton <[email protected]> wrote:
>>> On Sun, Dec 26, 2010 at 2:46 PM, Rakib Mullick <[email protected]> wrote:
>>>> On Sun, Dec 26, 2010 at 11:13 AM, Hillf Danton <[email protected]> wrote:
>>>> Above description tells how its done, what it is but its not clear why
>>>> we should use it. Can I know why should we use it or its benefits?
>>>>
>>>
>>> The outstanding benefit looks that nano semaphore could be used in
>>> cases where callers want to wait not only more than one jiffy, but far
>>> less than one jiffy also.
>>>
>> But, how do we know that resources are going to be available within
>> one jiffy or far less than one jiffy? We we be deterministic?
>>
> This is a really hard question.
>
> Though I could not answer as fine as Ingo Molnar could, the
> deterministic has to be faced by most contentions for resources in
> kernel, simply because deadlock could occur in spin lock for instance.
>
Yes, if its not handled carefully.

> On the other hand, this question explores a byproduct advantage, not
> seriously considered before, ?of nano semaphore, that the caller could
> learn that there is something out of track if she waited over 100
> microseconds in 5 consequent steps, 20 microseconds a step, if she
> think the resource should be available in 60 microseconds.
>
Why should caller think such a way, that resource will be available in
60 microseconds?

> And in nano semaphore method is available for callers to select
> waiting over one second, one millisecond, one microsecond, one
> nanosecond, depending on what the underlying system could offer. After
> waiting, the caller is free to determine what to do next if the
> resource is not available.
>
How do we know that, what the underlying system will offer? Its an
NP-type problem. We cannot determine what will happen on a systems
context. When a caller need resource, without that resource is it
possible to accomplish its job? If its not, then the ultimate way to
deal with it, is simply waiting until it gets the resource.

> Here is another sample, if taxi will not come in two minutes, I could
> either give up shopping downtown, or wait another three minutes,
> depending on what will happen two minutes later.

But what, if your mother told you not to come home until you done shopping? :)

thanks,
rakib

>
> thanks
> Hillf
>

2010-12-27 14:04:28

by Hillf Danton

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Sun, Dec 26, 2010 at 8:56 PM, Rakib Mullick <[email protected]> wrote:
> On Sun, Dec 26, 2010 at 6:05 PM, Hillf Danton <[email protected]> wrote:
>> On Sun, Dec 26, 2010 at 5:08 PM, Rakib Mullick <[email protected]> wrote:
>>> On Sun, Dec 26, 2010 at 1:04 PM, Hillf Danton <[email protected]> wrote:
>>>> On Sun, Dec 26, 2010 at 2:46 PM, Rakib Mullick <[email protected]> wrote:
>>>>> On Sun, Dec 26, 2010 at 11:13 AM, Hillf Danton <[email protected]> wrote:
>>>>> Above description tells how its done, what it is but its not clear why
>>>>> we should use it. Can I know why should we use it or its benefits?
>>>>>
>>>>
>>>> The outstanding benefit looks that nano semaphore could be used in
>>>> cases where callers want to wait not only more than one jiffy, but far
>>>> less than one jiffy also.
>>>>
>>> But, how do we know that resources are going to be available within
>>> one jiffy or far less than one jiffy? We we be deterministic?
>>>
>> This is a really hard question.
>>
>> Though I could not answer as fine as Ingo Molnar could, the
>> deterministic has to be faced by most contentions for resources in
>> kernel, simply because deadlock could occur in spin lock for instance.
>>
> Yes, if its not handled carefully.
>
>> On the other hand, this question explores a byproduct advantage, not
>> seriously considered before, of nano semaphore, that the caller could
>> learn that there is something out of track if she waited over 100
>> microseconds in 5 consequent steps, 20 microseconds a step, if she
>> think the resource should be available in 60 microseconds.
>>
> Why should caller think such a way, that resource will be available in
> 60 microseconds?
>

There are methods in kernel, say watchdog, to detect cases that are out
of track. The estimation about the availability of resource is utilized also
in dispatching commands to scsi disk by registering timer.

In nano semaphore, though the methods not implemented,
the waiter is able to work out which task is holding it over 60us in
out-of-track case.

>> And in nano semaphore method is available for callers to select
>> waiting over one second, one millisecond, one microsecond, one
>> nanosecond, depending on what the underlying system could offer. After
>> waiting, the caller is free to determine what to do next if the
>> resource is not available.
>>
> How do we know that, what the underlying system will offer? Its an

This is another hard question. And Ingo could say a few words about
the accuracy of hrtimer.

> NP-type problem. We cannot determine what will happen on a systems
> context. When a caller need resource, without that resource is it

This is not hard. When kmallocing 4k, if NULL returned, I will try half,
if still not available, simply return -ENOMEM.

> possible to accomplish its job? If its not, then the ultimate way to
> deal with it, is simply waiting until it gets the resource.
>

Another way looks to return -EBUSY directly, like aborting what is
dispatched and reset the bus then in scsi.

>> Here is another sample, if taxi will not come in two minutes, I could
>> either give up shopping downtown, or wait another three minutes,
>> depending on what will happen two minutes later.
>
> But what, if your mother told you not to come home until you done shopping? :)
>

I will info her every two minutes that taxi still not available, and
go home after 15 infos.

And lets return to nano semaphore, it is designed, unlike semaphore, not to loop
until released by its holder. Another difference is that the caller is free to
select the time period in nano seconds to wait.

Should anything else be added?

thanks,
Hillf

2010-12-27 20:09:00

by Randy Dunlap

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Sun, 26 Dec 2010 13:13:42 +0800 Hillf Danton wrote:

> Based upon high resolution timer and idea borrowed from semaphore,
> nano semaphore is created.
>
> Nano semaphore provides finer time resolution depending on system
> configuration and capabilities.
>
> Nano semaphore is not to replace semaphore, but used in application
> environments where nano seconds are required.
>
> Three methods, nano_semaphore_try_down, nano_semaphore_down and
> nano_semaphore_up are implemented in a header file, and there is no
> corresponding C file since nano semaphore is not complex.
>
> Signed-off-by: Hillf Danton <[email protected]>
> ---
>
> --- a/include/linux/nano_semaphore.h 1970-01-01 08:00:00.000000000 +0800
> +++ b/include/linux/nano_semaphore.h 2010-12-26 12:38:36.000000000 +0800
> @@ -0,0 +1,263 @@
> +/*
> + * linux/nano_semaphore.h
> + *
> + * Definition and implementation of nano semaphore
> + *
> + * Nano semaphore provides finer time resolution depending on system
> + * configuration and capabilities.
> + *
> + * Nano semaphore could be used in parallel with semaphore.
> + *
> + * Started-by: Hillf Danton
> + *
> + * Credits:
> + * ideas are borrowed from semaphore and high resolution timer
> + *
> + * Distributed under the terms of GPL v2
> + */
> +
> +#ifndef __NANO_SEMAPHORE_H_
> +#define __NANO_SEMAPHORE_H_
> +
> +#include <linux/list.h>
> +#include <linux/spinlock.h>
> +#include <linux/ktime.h>
> +#include <linux/sched.h>
> +#include <linux/hrtimer.h>
> +
> +/* must be initialized before use */
> +struct nano_semaphore {
> + struct list_head chair;
> + struct task_struct *holder;
> + spinlock_t lock;
> +};
> +
> +#define NANO_SEMAPHORE_INITIALIZER(name) \
> +{ \
> + .chair = LIST_HEAD_INIT((name).chair), \
> + .holder = NULL, \
> + .lock = __SPIN_LOCK_UNLOCKED((name).lock), \
> +}
> +
> +
> +#define DEFINE_NANO_SEMAPHORE(name) \
> + struct nano_semaphore name = NANO_SEMAPHORE_INITIALIZER(name)
> +
> +
> +static inline void nano_semaphore_init(struct nano_semaphore *s)
> +{
> + INIT_LIST_HEAD(&s->chair);
> + s->holder = NULL;
> + spin_lock_init(&s->lock);
> +}
> +
> +/*
> + * Helper functions about nano semaphore
> + */
> +
> +/*
> + * Helper function to check, whether anyone is waiting the nano semaphore
> + *
> + * Called with lock hold

with lock held

(change this in many places)

> + */
> +static inline int nano_semaphore_waiter_pending(struct nano_semaphore *s)
> +{
> + return !list_empty(&s->chair);
> +}
> +
> +/*
> + * Helper function to check, whether the nano semaphore is not hold by anyone
> + *
> + * Called with lock hold
> + */
> +static inline int nano_semaphore_holder_empty(struct nano_semaphore *s)
> +{
> + return !s->holder;
> +}
> +
> +/*
> + * Helper function to check, whether `task' is the holder of nano semaphore
> + *
> + * Called with lock hold
> + */
> +static inline int
> +nano_semaphore_holder_match(struct nano_semaphore *s, struct task_struct *task)
> +{
> + return s->holder == task;
> +}
> +
> +/*
> + * Helper function to set `task' to be the holder of nano semaphore
> + *
> + * Called with lock hold
> + */
> +static inline void
> +nano_semaphore_set_holder(struct nano_semaphore *s, struct task_struct *task)
> +{
> + s->holder = task;
> +}
> +
> +/*
> + * Helper function try to acquire the nano semaphore
> + *
> + * Returns 1 if acquired successfully, 0 otherwise.
> + *
> + * Called with lock hold
> + */
> +static inline int __nano_semaphore_try_down(struct nano_semaphore *s)
> +{
> + int ret;
> +
> + ret = !nano_semaphore_waiter_pending(s) &&
> + nano_semaphore_holder_empty(s);
> + if (ret)
> + nano_semaphore_set_holder(s, current);
> +
> + return ret;
> +}

Several of the functions above could easily be modified to use kernel-doc
notation for their documentation comments... please.

> +
> +/*

Please use
/**
here so that the kernel-doc can be used/expanded.

> + * nano_semaphore_try_down - try to acquire the nano semaphore without waiting
> + * @s: the nano semaphore to be acquired
> + *
> + * Returns 1 if acquired successfully, 0 otherwise.
> + */
> +static inline int nano_semaphore_try_down(struct nano_semaphore *s)
> +{
> + unsigned long flags;
> + int ret;
> +
> + spin_lock_irqsave(&s->lock, flags);
> + ret = __nano_semaphore_try_down(s);
> + spin_unlock_irqrestore(&s->lock, flags);
> +
> + return ret;
> +}
> +
> +
> +/* only for internal use by nano semaphore */
> +struct nano_semaphore_waiter {
> + struct list_head node;
> + struct task_struct *task;
> +};
> +
> +/*
> + * Helper function to init a waiter with `task'
> + */
> +static inline void nano_semaphore_waiter_init(struct nano_semaphore_waiter *w,
> + struct task_struct *task)
> +{
> + INIT_LIST_HEAD(&w->node);
> + w->task = task;
> +}
> +
> +/*
> + * Helper function to get the first pending waiter of nano semaphore
> + *
> + * Called with lock hold
> + */
> +struct inline struct nano_semaphore_waiter *
> +nano_semaphore_get_waiter(struct nano_semaphore *s)
> +{
> + if (nano_semaphore_waiter_pending(s))
> + return list_first_entry(&s->chair,
> + struct nano_semaphore_waiter, node);
> + return NULL;
> +}
> +
> +/*
> + * Helper function to sleep a while
> + *
> + * Called with lock not hold
> + */
> +static inline int nano_semaphore_sleep(unsigned long nano_secs)
> +{
> + int ret;
> +
> + if (nano_secs) {
> + ktime_t ktime = ktime_set(0, nano_secs);
> +
> + ret = schedule_hrtimeout(&ktime, HRTIMER_MODE_REL);
> + } else
> + ret = schedule_hrtimeout(NULL, HRTIMER_MODE_REL);
> +
> + return ret;
> +}
> +
> +/*

/**

> + * nano_semaphore_down - acquire the nano semaphore
> + * @s: the nano semaphore to be acquired
> + * @nano_secs: the nano seconds to wait if necessary,
> + * could be zero if want to wait as long as possible.
> + *
> + * Returns >0 if acquired successfully, <=0 otherwise.
> + *
> + * Note unlike down() in semaphore, nano_semaphore_down is not looping until
> + * the nano semaphore is hold, but simply reports the result. And the callers

is held,

> + * could, if they like, loop in simple manner, for instance,
> + * while (1 != nano_semaphore_down(s, 800));
> + * do_home_work();
> + * nano_semaphore_up(s);
> + *
> + */
> +static inline int
> +nano_semaphore_down(struct nano_semaphore *s, unsigned long nano_secs)
> +{
> + unsigned long flags;
> + int ret;
> + struct nano_semaphore_waiter w;
> +
> + spin_lock_irqsave(&s->lock, flags);
> +
> + ret = __nano_semaphore_try_down(s);
> + if (ret)
> + goto out;
> +
> + nano_semaphore_waiter_init(&w, current);
> +
> + __set_task_state(current, TASK_INTERRUPTIBLE);
> +
> + list_add_tail(&w.node, &s->chair);
> +
> + spin_unlock_irqrestore(&s->lock, flags);
> + ret = nano_semaphore_sleep(nano_secs);
> + spin_lock_irqsave(&s->lock, flags);
> +
> + list_del(&w.node);
> +
> + if (nano_semaphore_holder_match(s, current))
> + ret = 1;
> + out:
> + spin_unlock_irqrestore(&s->lock, flags);
> +
> + return ret;
> +}
> +
> +/**
> + * nano_semaphore_up - release the nano semaphore
> + * @s: the nano semaphore to release
> + *
> + * Note nano_semaphore_up() could be called even by tasks which have never
> + * called nano_semaphore_down(), but the tricky is not recommended.

s/tricky/trick/

> + */
> +static inline void nano_semaphore_up(struct nano_semaphore *s)
> +{
> + struct nano_semaphore_waiter *w;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&s->lock, flags);
> +
> + if (! nano_semaphore_holder_match(s, current))
> + goto out;
> +
> + w = nano_semaphore_get_waiter(s);
> + if (w) {
> + nano_semaphore_set_holder(s, w->task);
> + wake_up_process(w->task);
> + } else
> + nano_semaphore_set_holder(s, NULL);
> + out:
> + spin_unlock_irqrestore(&s->lock, flags);
> +}
> +
> +#endif /* __NANO_SEMAPHORE_H_ */
> --

---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***
desserts: http://www.xenotime.net/linux/recipes/

2010-12-27 21:15:44

by Arnd Bergmann

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Sunday 26 December 2010, Hillf Danton wrote:
>
> Based upon high resolution timer and idea borrowed from semaphore,
> nano semaphore is created.
>
> Nano semaphore provides finer time resolution depending on system
> configuration and capabilities.
>
> Nano semaphore is not to replace semaphore, but used in application
> environments where nano seconds are required.
>
> Three methods, nano_semaphore_try_down, nano_semaphore_down and
> nano_semaphore_up are implemented in a header file, and there is no
> corresponding C file since nano semaphore is not complex.
>
> Signed-off-by: Hillf Danton <[email protected]>

There are very few users of real semaphores today, and we're trying to
get rid of them. It's not clear what your requirements are, since you
have not posted any new users of this, but instead of adding more
locking primitives, I would recommend changing one of the existing
ones (mutex, semaphore, rwsem) to have nanosecond timeouts instead
of jiffies.

The easiest way would certainly be to change the three users of
down_timeout() to use nanoseconds.

Arnd

2010-12-28 13:13:22

by Hillf Danton

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Tue, Dec 28, 2010 at 5:15 AM, Arnd Bergmann <[email protected]> wrote:
> On Sunday 26 December 2010, Hillf Danton wrote:
>>
>> Based upon high resolution timer and idea borrowed from semaphore,
>> nano semaphore is created.
>>
>> Nano semaphore provides finer time resolution depending on system
>> configuration and capabilities.
>>
>> Nano semaphore is not to replace semaphore, but used in application
>> environments where nano seconds are required.
>>
>> Three methods, nano_semaphore_try_down, nano_semaphore_down and
>> nano_semaphore_up are implemented in a header file, and there is no
>> corresponding C file since nano semaphore is not complex.
>>
>> Signed-off-by: Hillf Danton <[email protected]>
>
> There are very few users of real semaphores today, and we're trying to
> get rid of them. It's not clear what your requirements are, since you
> have not posted any new users of this, but instead of adding more

I want to add new way to do semaphore with finer time resolution,
without considering much about its potential usage.

And I learn from your comments there is more to do, thanks.

> locking primitives, I would recommend changing one of the existing
> ones (mutex, semaphore, rwsem) to have nanosecond timeouts instead
> of jiffies.
>
> The easiest way would certainly be to change the three users of
> down_timeout() to use nanoseconds.
>

I will try the three instances of down_timeout() soon.

Thanks
Hillf

2010-12-28 15:51:40

by Daniel Walker

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Mon, 2010-12-27 at 22:15 +0100, Arnd Bergmann wrote:
> On Sunday 26 December 2010, Hillf Danton wrote:
> >
> > Based upon high resolution timer and idea borrowed from semaphore,
> > nano semaphore is created.
> >
> > Nano semaphore provides finer time resolution depending on system
> > configuration and capabilities.
> >
> > Nano semaphore is not to replace semaphore, but used in application
> > environments where nano seconds are required.
> >
> > Three methods, nano_semaphore_try_down, nano_semaphore_down and
> > nano_semaphore_up are implemented in a header file, and there is no
> > corresponding C file since nano semaphore is not complex.
> >
> > Signed-off-by: Hillf Danton <[email protected]>
>
> There are very few users of real semaphores today, and we're trying to
> get rid of them. It's not clear what your requirements are, since you
> have not posted any new users of this, but instead of adding more
> locking primitives, I would recommend changing one of the existing
> ones (mutex, semaphore, rwsem) to have nanosecond timeouts instead
> of jiffies.
>
> The easiest way would certainly be to change the three users of
> down_timeout() to use nanoseconds.

We for sure don't want new semaphores, or new semaphore usage in the
kernel ..

It should also be noted that the rtmutex (kernel/rtmutex.c) already has
this capability. Although I don't think you can use an rtmutex from
inside the kernel.

If you really want this you should look into the rtmutex, and the
regular mutex API's .

Daniel

--
Sent by an consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
Forum.

2010-12-29 11:48:09

by Arnd Bergmann

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Tuesday 28 December 2010 16:51:30 Daniel Walker wrote:
> We for sure don't want new semaphores, or new semaphore usage in the
> kernel ..

Yes. I once even tried unifying the semaphore and rwsem implementation,
but gave up on that for a number of reasons.

> It should also be noted that the rtmutex (kernel/rtmutex.c) already has
> this capability. Although I don't think you can use an rtmutex from
> inside the kernel.

I wasn't aware we had already grown another one ;-)

AFAICT, you can only use it inside of the kernel, but it's very
specific and I wouldn't recommend using it unless a regular mutex
cannot be used for some reason. The only user besides the futex
code seems to be the i2c layer at this moment.

> If you really want this you should look into the rtmutex, and the
> regular mutex API's .

If Hillf relies on counting semaphores, that won't work, but very
few such users exist in code outside of textbooks.

Arnd

2010-12-29 14:44:30

by Hillf Danton

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Wed, Dec 29, 2010 at 7:47 PM, Arnd Bergmann <[email protected]> wrote:
> On Tuesday 28 December 2010 16:51:30 Daniel Walker wrote:
>> We for sure don't want new semaphores, or new semaphore usage in the
>> kernel ..

Would you please, Daniel, explain why there are so my file systems under
the fs directory? Would you think the ext file system is better than others?

And why there are in kernel spin lock, read/write lock, mutex, rw_mutex,
rtmutx, and semaphore, timer and hrtimer?

Could timer be removed tonight?

>
> Yes. I once even tried unifying the semaphore and rwsem implementation,
> but gave up on that for a number of reasons.

It looks hard to change rwsem, almost impossible, since it is based upon
asm, at least under the x86 dir.

>
>> It should also be noted that the rtmutex (kernel/rtmutex.c) already has
>> this capability. Although I don't think you can use an rtmutex from
>> inside the kernel.
>
> I wasn't aware we had already grown another one ;-)
>
> AFAICT, you can only use it inside of the kernel, but it's very
> specific and I wouldn't recommend using it unless a regular mutex
> cannot be used for some reason. The only user besides the futex
> code seems to be the i2c layer at this moment.
>
>> If you really want this you should look into the rtmutex, and the
>> regular mutex API's .
>

But greping "struct semaphore" include/linux and fs dirs may tell us
more about semaphore.

> If Hillf relies on counting semaphores, that won't work, but very
> few such users exist in code outside of textbooks.
>

Though capable in rtmutex, why mutex should no longer stay in Kernel?

However mutex could be changed based on hrtimer if needed for some reason.

Thanks
Hillf
---

--- a/kernel/mutex.c 2010-11-01 19:54:12.000000000 +0800
+++ b/kernel/mutex.c 2010-12-29 22:35:40.000000000 +0800
@@ -23,6 +23,7 @@
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
+#include <linux/hrtimer.h>

/*
* In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -248,7 +249,11 @@ __mutex_lock_common(struct mutex *lock,
/* didnt get the lock, go to sleep: */
spin_unlock_mutex(&lock->wait_lock, flags);
preempt_enable_no_resched();
- schedule();
+ do {
+ /* sleep 10,000 nanoseconds per loop */
+ ktime_t kt = ktime_set(0, 10000);
+ schedule_hrtimeout(&kt, HRTIMER_MODE_REL);
+ } while (0);
preempt_disable();
spin_lock_mutex(&lock->wait_lock, flags);
}

2010-12-29 14:57:45

by Hillf Danton

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

Based upon high resolution timer and idea borrowed from semaphore,
nano semaphore is created.

Nano semaphore provides finer time resolution depending on system
configuration and capabilities.

Nano semaphore is not to replace semaphore, but used in application
environments where nano seconds are required, rather than jiffy.

Three methods, nano_semaphore_try_down, nano_semaphore_down and
nano_semaphore_up are implemented in a header file, and there is no
corresponding C file since nano semaphore is not complex.

The benefit looks that, like timer and hrtimer, mutex and rtmutex,
file systems with and without journal, spin lock and rw_lock,
kernel is enriched with new element.

Signed-off-by: Hillf Danton <[email protected]>
---

--- a/include/linux/nano_semaphore.h 1970-01-01 08:00:00.000000000 +0800
+++ b/include/linux/nano_semaphore.h 2010-12-29 21:50:24.000000000 +0800
@@ -0,0 +1,300 @@
+/*
+ * linux/nano_semaphore.h
+ *
+ * Definition and implementation of nano semaphore
+ *
+ * Nano semaphore provides finer time resolution depending on system
+ * configuration and capabilities.
+ *
+ * Nano semaphore could be used in parallel with semaphore.
+ *
+ * Started-by: Hillf Danton
+ *
+ * Credits:
+ * ideas are borrowed from semaphore and high resolution timer
+ *
+ * Distributed under the terms of GPL v2
+ */
+
+#ifndef __NANO_SEMAPHORE_H_
+#define __NANO_SEMAPHORE_H_
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
+#include <linux/hrtimer.h>
+
+/**
+ * struct nano_semaphore - definition of nano semaphore
+ * @chair: the list head for pending waiters
+ * @holder: the current holder
+ * @lock: spin lock to serialize accesses in parallel
+ *
+ * must be initialized before usage.
+ */
+struct nano_semaphore {
+ struct list_head chair;
+ struct task_struct *holder;
+ spinlock_t lock;
+};
+
+#define NANO_SEMAPHORE_INITIALIZER(name) \
+{ \
+ .chair = LIST_HEAD_INIT((name).chair), \
+ .holder = NULL, \
+ .lock = __SPIN_LOCK_UNLOCKED((name).lock), \
+}
+
+
+#define DEFINE_NANO_SEMAPHORE(name) \
+ struct nano_semaphore name = NANO_SEMAPHORE_INITIALIZER(name)
+
+/**
+ * nano_semaphore_init() - initialize a nano semaphore
+ * @s: the nano semaphore to be initialized
+ *
+ * Nano semaphore should only be used after initialization.
+ */
+static inline void nano_semaphore_init(struct nano_semaphore *s)
+{
+ INIT_LIST_HEAD(&s->chair);
+ s->holder = NULL;
+ spin_lock_init(&s->lock);
+}
+
+/*
+ * Helper functions about nano semaphore
+ */
+
+
+/**
+ * nano_semaphore_waiter_pending() - check if waiter is pending
+ * @s: the nano semaphore to be checked
+ *
+ * Return 1 if pending, 0 otherwise. Called with lock held.
+ */
+static inline int nano_semaphore_waiter_pending(struct nano_semaphore *s)
+{
+ return !list_empty(&s->chair);
+}
+
+/**
+ * nano_semaphore_holder_empty() - check if holderer is empty
+ * @s: the nano semaphore to be checked
+ *
+ * Return 1 if empty, 0 otherwise. Called with lock held.
+ */
+static inline int nano_semaphore_holder_empty(struct nano_semaphore *s)
+{
+ return !s->holder;
+}
+
+/**
+ * nano_semaphore_holder_match() - check if holderer is the input task
+ * @s: the nano semaphore to be checked
+ * @task: the task under consideration
+ *
+ * Return 1 if match, 0 otherwise. Called with lock held.
+ */
+static inline int
+nano_semaphore_holder_match(struct nano_semaphore *s,
+ struct task_struct *task)
+{
+ return s->holder == task;
+}
+
+/**
+ * nano_semaphore_set_holder() - set holder to be the input task
+ * @s: the nano semaphore to be set
+ * @task: the task under consideration
+ *
+ * Called with lock held.
+ */
+static inline void
+nano_semaphore_set_holder(struct nano_semaphore *s,
+ struct task_struct *task)
+{
+ s->holder = task;
+}
+
+/**
+ * __nano_semaphore_try_down() - try to acquire nano semaphore
+ * @s: the nano semaphore to be acquired.
+ *
+ * Returns 1 if acquired successfully, 0 otherwise.
+ * Called with lock held.
+ */
+static inline int __nano_semaphore_try_down(struct nano_semaphore *s)
+{
+ int ret;
+
+ ret = nano_semaphore_holder_empty(s);
+ if (ret)
+ nano_semaphore_set_holder(s, current);
+
+ return ret;
+}
+
+/**
+ * nano_semaphore_try_down() - try to acquire nano semaphore without waiting
+ * @s: the nano semaphore to be acquired
+ *
+ * Returns 1 if acquired successfully, 0 otherwise.
+ */
+static inline int nano_semaphore_try_down(struct nano_semaphore *s)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&s->lock, flags);
+ ret = __nano_semaphore_try_down(s);
+ spin_unlock_irqrestore(&s->lock, flags);
+
+ return ret;
+}
+
+
+/**
+ * struct nano_semaphore_waiter - definition of nano semaphore waiter
+ * @node: the node added onto waiting list
+ * @task: the waiter task
+ *
+ * Only for internal use by nano semaphore
+ */
+struct nano_semaphore_waiter {
+ struct list_head node;
+ struct task_struct *task;
+};
+
+
+/**
+ * nano_semaphore_waiter_init() - init a waiter with the input task
+ * @w: the waiter to be initialized
+ * @task: the waiter task
+ *
+ * The @task could not be NULL.
+ */
+static inline void
+nano_semaphore_waiter_init(struct nano_semaphore_waiter *w,
+ struct task_struct *task)
+{
+ INIT_LIST_HEAD(&w->node);
+ w->task = task;
+}
+
+/**
+ * nano_semaphore_get_waiter() - get the first pending waiter
+ * @s: the nano semaphore to get waiter from
+ *
+ * Called with lock held.
+ */
+struct inline struct nano_semaphore_waiter *
+nano_semaphore_get_waiter(struct nano_semaphore *s)
+{
+ if (nano_semaphore_waiter_pending(s))
+ return list_first_entry(&s->chair,
+ struct nano_semaphore_waiter, node);
+ return NULL;
+}
+
+/**
+ * nano_semaphore_sleep() - sleep a while
+ * @nano_secs: the time period in nano seconds
+ *
+ * If want to sleep as long as possible, set @nano_secs to be zero.
+ *
+ * Returns 0 if time expired, <0 if interrupted. Called with lock not held.
+ */
+static inline int nano_semaphore_sleep(unsigned long nano_secs)
+{
+ int ret;
+
+ if (nano_secs) {
+ ktime_t ktime = ktime_set(0, nano_secs);
+
+ ret = schedule_hrtimeout(&ktime, HRTIMER_MODE_REL);
+ } else
+ ret = schedule_hrtimeout(NULL, HRTIMER_MODE_REL);
+
+ return ret;
+}
+
+/**
+ * nano_semaphore_down() - acquire nano semaphore
+ * @s: the nano semaphore to be acquired
+ * @nano_secs: the nano seconds to wait if necessary
+ *
+ * Note unlike down() in semaphore, nano_semaphore_down() is not looping
+ * until the nano semaphore is released, but simply reports the result.
+ * Callers could, if they like, loop in the following manner,
+ *
+ * while (1 != nano_semaphore_down(s, 800));
+ * do_home_work();
+ * nano_semaphore_up(s);
+ *
+ * for instance, but setting @nano_secs to zero is simpler.
+ *
+ * Returns >0 if acquired successfully, <=0 otherwise.
+ */
+static inline int
+nano_semaphore_down(struct nano_semaphore *s, unsigned long nano_secs)
+{
+ int ret;
+ unsigned long flags;
+ struct nano_semaphore_waiter w;
+
+ spin_lock_irqsave(&s->lock, flags);
+
+ ret = __nano_semaphore_try_down(s);
+ if (ret)
+ goto out;
+
+ nano_semaphore_waiter_init(&w, current);
+
+ __set_task_state(current, TASK_INTERRUPTIBLE);
+
+ /* waiters are managed in FIFO */
+ list_add_tail(&w.node, &s->chair);
+
+ spin_unlock_irqrestore(&s->lock, flags);
+ ret = nano_semaphore_sleep(nano_secs);
+ spin_lock_irqsave(&s->lock, flags);
+
+ list_del(&w.node);
+
+ if (nano_semaphore_holder_match(s, current))
+ ret = 1;
+ out:
+ spin_unlock_irqrestore(&s->lock, flags);
+
+ return ret;
+}
+
+/**
+ * nano_semaphore_up() - release nano semaphore
+ * @s: the nano semaphore to be released
+ *
+ * The caller task should be the current holder.
+ */
+static inline void nano_semaphore_up(struct nano_semaphore *s)
+{
+ struct nano_semaphore_waiter *w;
+ unsigned long flags;
+
+ spin_lock_irqsave(&s->lock, flags);
+
+ if (! nano_semaphore_holder_match(s, current))
+ goto out;
+
+ w = nano_semaphore_get_waiter(s);
+ if (w) {
+ nano_semaphore_set_holder(s, w->task);
+ wake_up_process(w->task);
+ } else
+ nano_semaphore_set_holder(s, NULL);
+ out:
+ spin_unlock_irqrestore(&s->lock, flags);
+}
+
+#endif /* __NANO_SEMAPHORE_H_ */

2010-12-29 14:58:16

by Daniel Walker

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Wed, 2010-12-29 at 22:42 +0800, Hillf Danton wrote:
> On Wed, Dec 29, 2010 at 7:47 PM, Arnd Bergmann <[email protected]> wrote:
> > On Tuesday 28 December 2010 16:51:30 Daniel Walker wrote:
> >> We for sure don't want new semaphores, or new semaphore usage in the
> >> kernel ..
>
> Would you please, Daniel, explain why there are so my file systems under
> the fs directory? Would you think the ext file system is better than others?
>
> And why there are in kernel spin lock, read/write lock, mutex, rw_mutex,
> rtmutx, and semaphore, timer and hrtimer?
>
> Could timer be removed tonight?

The problem with semaphores is that people use them in ways that are not
very nice, and not very efficient.. Since they are so flexible they can
be used in all sorts of ways, many of which are not clean. This is why,
if you read the kernel history, most semaphore have been removed from
the kernel and replaced with much nicer and cleaner mutexes.

Daniel

--
Sent by an consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
Forum.

2010-12-29 15:03:17

by Hillf Danton

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Wed, Dec 29, 2010 at 10:58 PM, Daniel Walker <[email protected]> wrote:
> On Wed, 2010-12-29 at 22:42 +0800, Hillf Danton wrote:
>> On Wed, Dec 29, 2010 at 7:47 PM, Arnd Bergmann <[email protected]> wrote:
>> > On Tuesday 28 December 2010 16:51:30 Daniel Walker wrote:
>> >> We for sure don't want new semaphores, or new semaphore usage in the
>> >> kernel ..
>>
>> Would you please, Daniel, explain why there are so my file systems under
>> the fs directory? Would you think the ext file system is better than others?
>>
>> And why there are in kernel spin lock, read/write lock, mutex, rw_mutex,
>> rtmutx, and semaphore, timer and hrtimer?
>>
>> Could timer be removed tonight?
>
> The problem with semaphores is that people use them in ways that are not
> very nice, and not very efficient.. Since they are so flexible they can
> be used in all sorts of ways, many of which are not clean. This is why,
> if you read the kernel history, most semaphore have been removed from
> the kernel and replaced with much nicer and cleaner mutexes.
>
> Daniel

Thanks for sharing the knowledge about rtmutx and semaphore.
Hillf

2010-12-29 19:16:48

by Arnd Bergmann

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Wednesday 29 December 2010 15:42:36 Hillf Danton wrote:
> On Wed, Dec 29, 2010 at 7:47 PM, Arnd Bergmann <[email protected]> wrote:
> > On Tuesday 28 December 2010 16:51:30 Daniel Walker wrote:
> >> We for sure don't want new semaphores, or new semaphore usage in the
> >> kernel ..
>
> Would you please, Daniel, explain why there are so my file systems under
> the fs directory? Would you think the ext file system is better than others?

Most of the file systems are for compatibility with other operating systems.
The ones that duplicate Linux-only functionality are there in order to provide
backwards-compatibility with existing users. We can't remove them in the
same way that we would remove code that is only used in the kernel itself.

> And why there are in kernel spin lock, read/write lock, mutex, rw_mutex,
> rtmutx, and semaphore

There are more of these, and they partly exist because it has been hard
to change all the old users. We did remove some others though.

> timer and hrtimer?
>
> Could timer be removed tonight?

These two are subtly different, timers are optimized for not expiring, while
hrtimer is optimized for actually expiring.

> > Yes. I once even tried unifying the semaphore and rwsem implementation,
> > but gave up on that for a number of reasons.
>
> It looks hard to change rwsem, almost impossible, since it is based upon
> asm, at least under the x86 dir.

That could be changed using the C implementation everywhere,
but there are other problems.

> >> It should also be noted that the rtmutex (kernel/rtmutex.c) already has
> >> this capability. Although I don't think you can use an rtmutex from
> >> inside the kernel.
> >
> > I wasn't aware we had already grown another one ;-)
> >
> > AFAICT, you can only use it inside of the kernel, but it's very
> > specific and I wouldn't recommend using it unless a regular mutex
> > cannot be used for some reason. The only user besides the futex
> > code seems to be the i2c layer at this moment.
> >
> >> If you really want this you should look into the rtmutex, and the
> >> regular mutex API's .
>
> But greping "struct semaphore" include/linux and fs dirs may tell us
> more about semaphore.

There are three classes of semaphore users today:

1. Those that initialize the semaphore to >1, guarding access to a
resource that has multiple users: acpi/osl, mthca, mlx4, megaraid,
comedi/vmk80xx, udlfb, usblc, usb-skeleton, blizzard, hwa742, and 9p.
2. Those that use the semaphore as some sort of completion, or a combination
of completion and mutex.
3. Those that can and should be converted to mutex: most of the staging
drivers, plus some more.

IMHO it would be nice to separate the first two classes in some way, so we
can make the counting semaphores stricter and apply the same rules as
mutexes and make the completion-like semaphores non-counting.

> > If Hillf relies on counting semaphores, that won't work, but very
> > few such users exist in code outside of textbooks.
> >
>
> Though capable in rtmutex, why mutex should no longer stay in Kernel?
>
> However mutex could be changed based on hrtimer if needed for some reason.

There is currently no mutex_lock_timeout(), so that would be meaningless.

> --- a/kernel/mutex.c 2010-11-01 19:54:12.000000000 +0800
> +++ b/kernel/mutex.c 2010-12-29 22:35:40.000000000 +0800
> @@ -23,6 +23,7 @@
> #include <linux/spinlock.h>
> #include <linux/interrupt.h>
> #include <linux/debug_locks.h>
> +#include <linux/hrtimer.h>
>
> /*
> * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> @@ -248,7 +249,11 @@ __mutex_lock_common(struct mutex *lock,
> /* didnt get the lock, go to sleep: */
> spin_unlock_mutex(&lock->wait_lock, flags);
> preempt_enable_no_resched();
> - schedule();
> + do {
> + /* sleep 10,000 nanoseconds per loop */
> + ktime_t kt = ktime_set(0, 10000);
> + schedule_hrtimeout(&kt, HRTIMER_MODE_REL);
> + } while (0);
> preempt_disable();
> spin_lock_mutex(&lock->wait_lock, flags);
> }
>

Doing this would be extremely inefficient, because now the mutex wait
function would wake up very frequently instead of just once when the
mutex has been released by another thread.

Arnd

2010-12-30 14:21:07

by Hillf Danton

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Thu, Dec 30, 2010 at 3:16 AM, Arnd Bergmann <[email protected]> wrote:
> On Wednesday 29 December 2010 15:42:36 Hillf Danton wrote:
>> On Wed, Dec 29, 2010 at 7:47 PM, Arnd Bergmann <[email protected]> wrote:
>> > On Tuesday 28 December 2010 16:51:30 Daniel Walker wrote:
>> >> We for sure don't want new semaphores, or new semaphore usage in the
>> >> kernel ..
>>
>> Would you please, Daniel, explain why there are so my file systems under
>> the fs directory? Would you think the ext file system is better than others?
>
> Most of the file systems are for compatibility with other operating systems.
> The ones that duplicate Linux-only functionality are there in order to provide
> backwards-compatibility with existing users. We can't remove them in the
> same way that we would remove code that is only used in the kernel itself.
>
>> And why there are in kernel spin lock, read/write lock, mutex, rw_mutex,
>> rtmutx, and semaphore
>
> There are more of these, and they partly exist because it has been hard
> to change all the old users. We did remove some others though.
>
>> timer and hrtimer?
>>
>> Could timer be removed tonight?
>
> These two are subtly different, timers are optimized for not expiring, while
> hrtimer is optimized for actually expiring.
>
>> > Yes. I once even tried unifying the semaphore and rwsem implementation,
>> > but gave up on that for a number of reasons.
>>
>> It looks hard to change rwsem, almost impossible, since it is based upon
>> asm, at least under the x86 dir.
>
> That could be changed using the C implementation everywhere,
> but there are other problems.
>
>> >> It should also be noted that the rtmutex (kernel/rtmutex.c) already has
>> >> this capability. Although I don't think you can use an rtmutex from
>> >> inside the kernel.
>> >
>> > I wasn't aware we had already grown another one ;-)
>> >
>> > AFAICT, you can only use it inside of the kernel, but it's very
>> > specific and I wouldn't recommend using it unless a regular mutex
>> > cannot be used for some reason. The only user besides the futex
>> > code seems to be the i2c layer at this moment.
>> >
>> >> If you really want this you should look into the rtmutex, and the
>> >> regular mutex API's .
>>
>> But greping "struct semaphore" include/linux and fs dirs may tell us
>> more about semaphore.
>
> There are three classes of semaphore users today:
>
> 1. Those that initialize the semaphore to >1, guarding access to a
> resource that has multiple users: acpi/osl, mthca, mlx4, megaraid,
> comedi/vmk80xx, udlfb, usblc, usb-skeleton, blizzard, hwa742, and 9p.
> 2. Those that use the semaphore as some sort of completion, or a combination
> of completion and mutex.
> 3. Those that can and should be converted to mutex: most of the staging
> drivers, plus some more.
>

Great description and summary.

> IMHO it would be nice to separate the first two classes in some way, so we
> can make the counting semaphores stricter and apply the same rules as
> mutexes and make the completion-like semaphores non-counting.
>
>> > If Hillf relies on counting semaphores, that won't work, but very
>> > few such users exist in code outside of textbooks.
>> >
>>
>> Though capable in rtmutex, why mutex should no longer stay in Kernel?
>>
>> However mutex could be changed based on hrtimer if needed for some reason.
>
> There is currently no mutex_lock_timeout(), so that would be meaningless.
>
>> --- a/kernel/mutex.c 2010-11-01 19:54:12.000000000 +0800
>> +++ b/kernel/mutex.c 2010-12-29 22:35:40.000000000 +0800
>> @@ -23,6 +23,7 @@
>> #include <linux/spinlock.h>
>> #include <linux/interrupt.h>
>> #include <linux/debug_locks.h>
>> +#include <linux/hrtimer.h>
>>
>> /*
>> * In the DEBUG case we are using the "NULL fastpath" for mutexes,
>> @@ -248,7 +249,11 @@ __mutex_lock_common(struct mutex *lock,
>> /* didnt get the lock, go to sleep: */
>> spin_unlock_mutex(&lock->wait_lock, flags);
>> preempt_enable_no_resched();
>> - schedule();
>> + do {
>> + /* sleep 10,000 nanoseconds per loop */
>> + ktime_t kt = ktime_set(0, 10000);
>> + schedule_hrtimeout(&kt, HRTIMER_MODE_REL);
>> + } while (0);
>> preempt_disable();
>> spin_lock_mutex(&lock->wait_lock, flags);
>> }
>>
>
> Doing this would be extremely inefficient, because now the mutex wait
> function would wake up very frequently instead of just once when the

Is it waked up less than jeffy?
If not, checking more frequently in the endless loop could help receive
signal, other than that is extremely meaningless, as the holder of
mutex is not ready to release.

As to timeout, it is another story, in which waiter is able to determine
how many jiffies or nanoseconds are acceptable if waiting is necessary.

Thanks
Hillf

> mutex has been released by another thread.
>
> Arnd
>

2010-12-30 15:56:09

by Arnd Bergmann

[permalink] [raw]

Subject: Re: [PATCH v0] add nano semaphore in kernel

On Thursday 30 December 2010, Hillf Danton wrote:
> >> /*
> >> * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> >> @@ -248,7 +249,11 @@ __mutex_lock_common(struct mutex *lock,
> >> /* didnt get the lock, go to sleep: */
> >> spin_unlock_mutex(&lock->wait_lock, flags);
> >> preempt_enable_no_resched();
> >> - schedule();
> >> + do {
> >> + /* sleep 10,000 nanoseconds per loop */
> >> + ktime_t kt = ktime_set(0, 10000);
> >> + schedule_hrtimeout(&kt, HRTIMER_MODE_REL);
> >> + } while (0);
> >> preempt_disable();
> >> spin_lock_mutex(&lock->wait_lock, flags);
> >> }
> >>
> >
> > Doing this would be extremely inefficient, because now the mutex wait
> > function would wake up very frequently instead of just once when the
>
> Is it waked up less than jeffy?
> If not, checking more frequently in the endless loop could help receive
> signal, other than that is extremely meaningless, as the holder of
> mutex is not ready to release.

It should normally only wake up once, at exactly the time that the other
thread releases the mutex. What exactly happens depends on the relative
priorities of the two lock holders and whether all CPUs are busy already.

If there is nothing else to do and the blocking thread is made runnable
by giving up the mutex, it will run immediately. Even if we keep the
current thread running, the jiffie timer is rather meaningless and
does not directly impact when the new one wakes up.

> As to timeout, it is another story, in which waiter is able to determine
> how many jiffies or nanoseconds are acceptable if waiting is necessary.

Actually, the timeout is the time after which the waiter gives up and
does something else because it no longer expects to get the lock. This
is very rarely needed.

Arnd