2014-11-28 09:40:07

by Lai Jiangshan

[permalink] [raw]
Subject: [RFC PATCH 0/2] tiny_rcu: simplify tiny_rcu

Hi, Paul

These two patches use the special feture of the UP system:
In UP, quiescent state == grace period.

For rcu_bh, rcu_process_callbacks() == a bh == QS == GP
so we can pass rcu_bh-QS and advance GP(and callbacks) in
rcu_process_callbacks(). After doing so, rcu_bh_qs() is useless
since its work is handled by rcu_process_callbacks().

For rcu_sched, context-switch = QS = GP, thus we can force a
context-switch when call_rcu_sched() is happened on idle_task.
After doing so, rcu_idle/irq_enter/exit() are useless.

These patches remove the useless code to simplify the tiny_rcu.

We can change rcu_bh_qs() rcu_idle/irq_enter/exit() to static-inline-functions
to reduce the binary size after these two patches accepted.

Thanks,
Lai

Cc: Josh Triplett <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Mathieu Desnoyers <[email protected]>

Lai Jiangshan (2):
record rcu_bh quiescent state in RCU_SOFTIRQ
tiny_rcu: resched when call_rcu() on idle_task

kernel/rcu/rcu.h | 6 ++
kernel/rcu/tiny.c | 134 ++++++++--------------------------------------
kernel/rcu/tiny_plugin.h | 2 +-
kernel/rcu/tree.c | 6 --
4 files changed, 29 insertions(+), 119 deletions(-)

--
1.7.4.4


2014-11-28 09:40:35

by Lai Jiangshan

[permalink] [raw]
Subject: [RFC PATCH 2/2] tiny_rcu: resched when call_rcu_sched() on idle_task

For rcu_sched in UP, context-switch = QS = GP, thus we can force a
context-switch when call_rcu_sched() is happened on idle_task.
After doing so, rcu_idle/irq_enter/exit() are useless, so we can simply
make these functions empty.

More important, this change does not change the functionality logically.
Note: raise_softirq(RCU_SOFTIRQ)/rcu_sched_qs() in rcu_idle_enter() and
outmost rcu_irq_exit() will have to wake up the ksoftirqd
(due to in_interrupt() == 0).

Before this patch After this patch:
call_rcu_sched() in idle; call_rcu_sched() in idle
set resched
do other stuffs; do other stuffs
outmost rcu_irq_exit() outmost rcu_irq_exit() (empty function)
(or rcu_idle_enter()) (or rcu_idle_enter(), also empty function)
start to resched. (see above)
rcu_sched_qs() rcu_sched_qs()
QS,and GP and advance cb QS,and GP and advance cb
wake up the ksoftirqd wake up the ksoftirqd
set resched
resched to ksoftirqd (or other) resched to ksoftirqd (or other)

These two code patches are almost the same.

Signed-off-by: Lai Jiangshan <[email protected]>
---
kernel/rcu/rcu.h | 6 +++
kernel/rcu/tiny.c | 98 +++-------------------------------------------
kernel/rcu/tiny_plugin.h | 2 +-
kernel/rcu/tree.c | 6 ---
4 files changed, 13 insertions(+), 99 deletions(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 07bb02e..80adef7 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -137,4 +137,10 @@ int rcu_jiffies_till_stall_check(void);

void rcu_early_boot_tests(void);

+/*
+ * This function really isn't for public consumption, but RCU is special in
+ * that context switches can allow the state machine to make progress.
+ */
+extern void resched_cpu(int cpu);
+
#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index f8e19ac..004cfe1 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -47,54 +47,14 @@ static void __call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu),
struct rcu_ctrlblk *rcp);

-static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-
#include "tiny_plugin.h"

-/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */
-static void rcu_idle_enter_common(long long newval)
-{
- if (newval) {
- RCU_TRACE(trace_rcu_dyntick(TPS("--="),
- rcu_dynticks_nesting, newval));
- rcu_dynticks_nesting = newval;
- return;
- }
- RCU_TRACE(trace_rcu_dyntick(TPS("Start"),
- rcu_dynticks_nesting, newval));
- if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
- struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
-
- RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"),
- rcu_dynticks_nesting, newval));
- ftrace_dump(DUMP_ALL);
- WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
- current->pid, current->comm,
- idle->pid, idle->comm); /* must be idle task! */
- }
- rcu_sched_qs();
- barrier();
- rcu_dynticks_nesting = newval;
-}
-
/*
* Enter idle, which is an extended quiescent state if we have fully
- * entered that mode (i.e., if the new value of dynticks_nesting is zero).
+ * entered that mode.
*/
void rcu_idle_enter(void)
{
- unsigned long flags;
- long long newval;
-
- local_irq_save(flags);
- WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
- if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) ==
- DYNTICK_TASK_NEST_VALUE)
- newval = 0;
- else
- newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE;
- rcu_idle_enter_common(newval);
- local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_enter);

@@ -103,55 +63,14 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
*/
void rcu_irq_exit(void)
{
- unsigned long flags;
- long long newval;
-
- local_irq_save(flags);
- newval = rcu_dynticks_nesting - 1;
- WARN_ON_ONCE(newval < 0);
- rcu_idle_enter_common(newval);
- local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_irq_exit);

-/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */
-static void rcu_idle_exit_common(long long oldval)
-{
- if (oldval) {
- RCU_TRACE(trace_rcu_dyntick(TPS("++="),
- oldval, rcu_dynticks_nesting));
- return;
- }
- RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting));
- if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
- struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
-
- RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"),
- oldval, rcu_dynticks_nesting));
- ftrace_dump(DUMP_ALL);
- WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
- current->pid, current->comm,
- idle->pid, idle->comm); /* must be idle task! */
- }
-}
-
/*
* Exit idle, so that we are no longer in an extended quiescent state.
*/
void rcu_idle_exit(void)
{
- unsigned long flags;
- long long oldval;
-
- local_irq_save(flags);
- oldval = rcu_dynticks_nesting;
- WARN_ON_ONCE(rcu_dynticks_nesting < 0);
- if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK)
- rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
- else
- rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
- rcu_idle_exit_common(oldval);
- local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_exit);

@@ -160,15 +79,6 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
*/
void rcu_irq_enter(void)
{
- unsigned long flags;
- long long oldval;
-
- local_irq_save(flags);
- oldval = rcu_dynticks_nesting;
- rcu_dynticks_nesting++;
- WARN_ON_ONCE(rcu_dynticks_nesting == 0);
- rcu_idle_exit_common(oldval);
- local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_irq_enter);

@@ -179,7 +89,7 @@ EXPORT_SYMBOL_GPL(rcu_irq_enter);
*/
bool notrace __rcu_is_watching(void)
{
- return rcu_dynticks_nesting;
+ return true;
}
EXPORT_SYMBOL(__rcu_is_watching);

@@ -352,6 +262,10 @@ static void __call_rcu(struct rcu_head *head,
void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_sched_ctrlblk);
+ if (unlikely(is_idle_task(current))) {
+ /* force scheduling for rcu_sched_qs() */
+ resched_cpu(0);
+ }
}
EXPORT_SYMBOL_GPL(call_rcu_sched);

diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index 858c565..b5fc5e5 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -147,7 +147,7 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
js = ACCESS_ONCE(rcp->jiffies_stall);
if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
- rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
+ rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE,
jiffies - rcp->gp_start, rcp->qlen);
dump_stack();
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3f5dd16..b5ef579 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -940,12 +940,6 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
}

/*
- * This function really isn't for public consumption, but RCU is special in
- * that context switches can allow the state machine to make progress.
- */
-extern void resched_cpu(int cpu);
-
-/*
* Return true if the specified CPU has passed through a quiescent
* state by virtue of being in or having passed through an dynticks
* idle state since the last call to dyntick_save_progress_counter()
--
1.7.4.4

2014-11-28 09:40:05

by Lai Jiangshan

[permalink] [raw]
Subject: [RFC PATCH 1/2] record rcu_bh quiescent state in RCU_SOFTIRQ

For rcu_bh in UP, rcu_process_callbacks() == a bh == QS == GP
so we can pass rcu_bh-QS and advance GP(and callbacks) in
rcu_process_callbacks().

After doing so, rcu_bh_qs() is useless since its work is handled
by rcu_process_callbacks(). So we make it as empty-function and
raise RCU_SOFTIRQ directly in call_rcu_bh().

Signed-off-by: Lai Jiangshan <[email protected]>
---
kernel/rcu/tiny.c | 38 +++++++++++++++++---------------------
1 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 805b6d5..f8e19ac 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -72,7 +72,7 @@ static void rcu_idle_enter_common(long long newval)
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
}
- rcu_sched_qs(); /* implies rcu_bh_inc() */
+ rcu_sched_qs();
barrier();
rcu_dynticks_nesting = newval;
}
@@ -186,49 +186,43 @@ EXPORT_SYMBOL(__rcu_is_watching);
#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */

/*
- * Helper function for rcu_sched_qs() and rcu_bh_qs().
+ * Helper function for rcu_sched_ctrlblk and rcu_bh_ctrlblk.
* Also irqs are disabled to avoid confusion due to interrupt handlers
* invoking call_rcu().
*/
static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
{
+ unsigned long flags;
+
+ local_irq_save(flags);
RCU_TRACE(reset_cpu_stall_ticks(rcp));
if (rcp->rcucblist != NULL &&
rcp->donetail != rcp->curtail) {
rcp->donetail = rcp->curtail;
+ local_irq_restore(flags);
return 1;
}
+ local_irq_restore(flags);

return 0;
}

/*
- * Record an rcu quiescent state. And an rcu_bh quiescent state while we
- * are at it, given that any rcu quiescent state is also an rcu_bh
- * quiescent state. Use "+" instead of "||" to defeat short circuiting.
+ * Record an rcu quiescent state.
*/
void rcu_sched_qs(void)
{
- unsigned long flags;
-
- local_irq_save(flags);
- if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
- rcu_qsctr_help(&rcu_bh_ctrlblk))
+ if (rcu_qsctr_help(&rcu_sched_ctrlblk))
raise_softirq(RCU_SOFTIRQ);
- local_irq_restore(flags);
}

/*
* Record an rcu_bh quiescent state.
+ * Do nothing, since this quiescent state is temporary useless until
+ * RCU_SOFTIRQ, and RCU_SOFTIRQ will really record the quiescent state.
*/
void rcu_bh_qs(void)
{
- unsigned long flags;
-
- local_irq_save(flags);
- if (rcu_qsctr_help(&rcu_bh_ctrlblk))
- raise_softirq(RCU_SOFTIRQ);
- local_irq_restore(flags);
}

/*
@@ -240,12 +234,10 @@ void rcu_bh_qs(void)
void rcu_check_callbacks(int user)
{
RCU_TRACE(check_cpu_stalls());
- if (user)
+ if (user) {
rcu_sched_qs();
- else if (!in_softirq())
- rcu_bh_qs();
- if (user)
rcu_note_voluntary_context_switch(current);
+ }
}

/*
@@ -303,6 +295,9 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
static void rcu_process_callbacks(struct softirq_action *unused)
{
__rcu_process_callbacks(&rcu_sched_ctrlblk);
+
+ /* Here is quiescent state for rcu_bh */
+ rcu_qsctr_help(&rcu_bh_ctrlblk);
__rcu_process_callbacks(&rcu_bh_ctrlblk);
}

@@ -367,6 +362,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_bh_ctrlblk);
+ raise_softirq(RCU_SOFTIRQ);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);

--
1.7.4.4

2014-12-01 16:46:52

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [RFC PATCH 0/2] tiny_rcu: simplify tiny_rcu

On Fri, Nov 28, 2014 at 05:43:31PM +0800, Lai Jiangshan wrote:
> Hi, Paul
>
> These two patches use the special feture of the UP system:
> In UP, quiescent state == grace period.
>
> For rcu_bh, rcu_process_callbacks() == a bh == QS == GP
> so we can pass rcu_bh-QS and advance GP(and callbacks) in
> rcu_process_callbacks(). After doing so, rcu_bh_qs() is useless
> since its work is handled by rcu_process_callbacks().
>
> For rcu_sched, context-switch = QS = GP, thus we can force a
> context-switch when call_rcu_sched() is happened on idle_task.
> After doing so, rcu_idle/irq_enter/exit() are useless.
>
> These patches remove the useless code to simplify the tiny_rcu.
>
> We can change rcu_bh_qs() rcu_idle/irq_enter/exit() to static-inline-functions
> to reduce the binary size after these two patches accepted.
>
> Thanks,
> Lai

These look interesting and promising!

Could you please add the change in the tiny.o size to the commit logs?

Also, one potential performance problem with this patchset is that it is
moving the RAISE_SOFTIRQ() from the scheduling-clock interrupt handler
(where it simply sets a bit in a per-CPU variable) to mainline code
(where it is often a much more expensive full wakeup of ksoftirqd).
My guess (and hope) is that this will be in the noise for most workloads,
but given that there is some chance of performance regressions, could
you please measure the performance difference on some reasonable
benchmark? The kernbench benchmark should be just fine for this purpose.

Thanx, Paul

> Cc: Josh Triplett <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Cc: Mathieu Desnoyers <[email protected]>
>
> Lai Jiangshan (2):
> record rcu_bh quiescent state in RCU_SOFTIRQ
> tiny_rcu: resched when call_rcu() on idle_task
>
> kernel/rcu/rcu.h | 6 ++
> kernel/rcu/tiny.c | 134 ++++++++--------------------------------------
> kernel/rcu/tiny_plugin.h | 2 +-
> kernel/rcu/tree.c | 6 --
> 4 files changed, 29 insertions(+), 119 deletions(-)
>
> --
> 1.7.4.4
>

2014-12-09 09:39:19

by Lai Jiangshan

[permalink] [raw]
Subject: Re: [RFC PATCH 0/2] tiny_rcu: simplify tiny_rcu

On 11/28/2014 11:42 PM, Paul E. McKenney wrote:
> On Fri, Nov 28, 2014 at 05:43:31PM +0800, Lai Jiangshan wrote:
>> Hi, Paul
>>
>> These two patches use the special feture of the UP system:
>> In UP, quiescent state == grace period.
>>
>> For rcu_bh, rcu_process_callbacks() == a bh == QS == GP
>> so we can pass rcu_bh-QS and advance GP(and callbacks) in
>> rcu_process_callbacks(). After doing so, rcu_bh_qs() is useless
>> since its work is handled by rcu_process_callbacks().
>>
>> For rcu_sched, context-switch = QS = GP, thus we can force a
>> context-switch when call_rcu_sched() is happened on idle_task.
>> After doing so, rcu_idle/irq_enter/exit() are useless.
>>
>> These patches remove the useless code to simplify the tiny_rcu.
>>
>> We can change rcu_bh_qs() rcu_idle/irq_enter/exit() to static-inline-functions
>> to reduce the binary size after these two patches accepted.
>>
>> Thanks,
>> Lai
>
> These look interesting and promising!
>
> Could you please add the change in the tiny.o size to the commit logs?
>
> Also, one potential performance problem with this patchset is that it is
> moving the RAISE_SOFTIRQ() from the scheduling-clock interrupt handler
> (where it simply sets a bit in a per-CPU variable) to mainline code
> (where it is often a much more expensive full wakeup of ksoftirqd).
> My guess (and hope) is that this will be in the noise for most workloads,
> but given that there is some chance of performance regressions, could
> you please measure the performance difference on some reasonable
> benchmark? The kernbench benchmark should be just fine for this purpose.


Hi, Paul

The kernbench tests showed no performance changed. It is expected due
to kernbench doesn't hit any network code which use RCU_BH.

But I still drop the patch1 since I only benchmarked it by kernbench.

Thanks,
Lai

>
> Thanx, Paul
>
>> Cc: Josh Triplett <[email protected]>
>> Cc: Steven Rostedt <[email protected]>
>> Cc: Mathieu Desnoyers <[email protected]>
>>
>> Lai Jiangshan (2):
>> record rcu_bh quiescent state in RCU_SOFTIRQ
>> tiny_rcu: resched when call_rcu() on idle_task
>>
>> kernel/rcu/rcu.h | 6 ++
>> kernel/rcu/tiny.c | 134 ++++++++--------------------------------------
>> kernel/rcu/tiny_plugin.h | 2 +-
>> kernel/rcu/tree.c | 6 --
>> 4 files changed, 29 insertions(+), 119 deletions(-)
>>
>> --
>> 1.7.4.4
>>
>
> .
>

2014-12-09 09:49:59

by Lai Jiangshan

[permalink] [raw]
Subject: [PATCH] tiny_rcu: directly force QS when call_rcu_[bh|sched]() on idle_task

For RCU in UP, context-switch = QS = GP, thus we can force a
context-switch when any call_rcu_[bh|sched]() is happened on idle_task.
After doing so, rcu_idle/irq_enter/exit() are useless, so we can simply
make these functions empty.

More important, this change does not change the functionality logically.
Note: raise_softirq(RCU_SOFTIRQ)/rcu_sched_qs() in rcu_idle_enter() and
outmost rcu_irq_exit() will have to wake up the ksoftirqd
(due to in_interrupt() == 0).

Before this patch After this patch:
call_rcu_sched() in idle; call_rcu_sched() in idle
set resched
do other stuffs; do other stuffs
outmost rcu_irq_exit() outmost rcu_irq_exit() (empty function)
(or rcu_idle_enter()) (or rcu_idle_enter(), also empty function)
start to resched. (see above)
rcu_sched_qs() rcu_sched_qs()
QS,and GP and advance cb QS,and GP and advance cb
wake up the ksoftirqd wake up the ksoftirqd
set resched
resched to ksoftirqd (or other) resched to ksoftirqd (or other)

These two code patches are almost the same.

Size changed after patched:

size kernel/rcu/tiny-old.o kernel/rcu/tiny-patched.o
text data bss dec hex filename
3449 206 8 3663 e4f kernel/rcu/tiny-old.o
2406 144 8 2558 9fe kernel/rcu/tiny-patched.o

Signed-off-by: Lai Jiangshan <[email protected]>
---
kernel/rcu/rcu.h | 6 +++
kernel/rcu/tiny.c | 99 +++------------------------------------------
kernel/rcu/tiny_plugin.h | 2 +-
kernel/rcu/tree.c | 6 ---
4 files changed, 14 insertions(+), 99 deletions(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 07bb02e..80adef7 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -137,4 +137,10 @@ int rcu_jiffies_till_stall_check(void);

void rcu_early_boot_tests(void);

+/*
+ * This function really isn't for public consumption, but RCU is special in
+ * that context switches can allow the state machine to make progress.
+ */
+extern void resched_cpu(int cpu);
+
#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 805b6d5..85287ec 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -47,54 +47,14 @@ static void __call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu),
struct rcu_ctrlblk *rcp);

-static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-
#include "tiny_plugin.h"

-/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */
-static void rcu_idle_enter_common(long long newval)
-{
- if (newval) {
- RCU_TRACE(trace_rcu_dyntick(TPS("--="),
- rcu_dynticks_nesting, newval));
- rcu_dynticks_nesting = newval;
- return;
- }
- RCU_TRACE(trace_rcu_dyntick(TPS("Start"),
- rcu_dynticks_nesting, newval));
- if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
- struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
-
- RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"),
- rcu_dynticks_nesting, newval));
- ftrace_dump(DUMP_ALL);
- WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
- current->pid, current->comm,
- idle->pid, idle->comm); /* must be idle task! */
- }
- rcu_sched_qs(); /* implies rcu_bh_inc() */
- barrier();
- rcu_dynticks_nesting = newval;
-}
-
/*
* Enter idle, which is an extended quiescent state if we have fully
- * entered that mode (i.e., if the new value of dynticks_nesting is zero).
+ * entered that mode.
*/
void rcu_idle_enter(void)
{
- unsigned long flags;
- long long newval;
-
- local_irq_save(flags);
- WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
- if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) ==
- DYNTICK_TASK_NEST_VALUE)
- newval = 0;
- else
- newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE;
- rcu_idle_enter_common(newval);
- local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_enter);

@@ -103,55 +63,14 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
*/
void rcu_irq_exit(void)
{
- unsigned long flags;
- long long newval;
-
- local_irq_save(flags);
- newval = rcu_dynticks_nesting - 1;
- WARN_ON_ONCE(newval < 0);
- rcu_idle_enter_common(newval);
- local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_irq_exit);

-/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */
-static void rcu_idle_exit_common(long long oldval)
-{
- if (oldval) {
- RCU_TRACE(trace_rcu_dyntick(TPS("++="),
- oldval, rcu_dynticks_nesting));
- return;
- }
- RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting));
- if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
- struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
-
- RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"),
- oldval, rcu_dynticks_nesting));
- ftrace_dump(DUMP_ALL);
- WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
- current->pid, current->comm,
- idle->pid, idle->comm); /* must be idle task! */
- }
-}
-
/*
* Exit idle, so that we are no longer in an extended quiescent state.
*/
void rcu_idle_exit(void)
{
- unsigned long flags;
- long long oldval;
-
- local_irq_save(flags);
- oldval = rcu_dynticks_nesting;
- WARN_ON_ONCE(rcu_dynticks_nesting < 0);
- if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK)
- rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
- else
- rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
- rcu_idle_exit_common(oldval);
- local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_exit);

@@ -160,15 +79,6 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
*/
void rcu_irq_enter(void)
{
- unsigned long flags;
- long long oldval;
-
- local_irq_save(flags);
- oldval = rcu_dynticks_nesting;
- rcu_dynticks_nesting++;
- WARN_ON_ONCE(rcu_dynticks_nesting == 0);
- rcu_idle_exit_common(oldval);
- local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_irq_enter);

@@ -179,7 +89,7 @@ EXPORT_SYMBOL_GPL(rcu_irq_enter);
*/
bool notrace __rcu_is_watching(void)
{
- return rcu_dynticks_nesting;
+ return true;
}
EXPORT_SYMBOL(__rcu_is_watching);

@@ -347,6 +257,11 @@ static void __call_rcu(struct rcu_head *head,
rcp->curtail = &head->next;
RCU_TRACE(rcp->qlen++);
local_irq_restore(flags);
+
+ if (unlikely(is_idle_task(current))) {
+ /* force scheduling for rcu_sched_qs() */
+ resched_cpu(0);
+ }
}

/*
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index 858c565..b5fc5e5 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -147,7 +147,7 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
js = ACCESS_ONCE(rcp->jiffies_stall);
if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
- rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
+ rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE,
jiffies - rcp->gp_start, rcp->qlen);
dump_stack();
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3f5dd16..b5ef579 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -940,12 +940,6 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
}

/*
- * This function really isn't for public consumption, but RCU is special in
- * that context switches can allow the state machine to make progress.
- */
-extern void resched_cpu(int cpu);
-
-/*
* Return true if the specified CPU has passed through a quiescent
* state by virtue of being in or having passed through an dynticks
* idle state since the last call to dyntick_save_progress_counter()
--
1.7.4.4

2014-12-09 17:03:11

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [PATCH] tiny_rcu: directly force QS when call_rcu_[bh|sched]() on idle_task

On Tue, Dec 09, 2014 at 05:53:34PM +0800, Lai Jiangshan wrote:
> For RCU in UP, context-switch = QS = GP, thus we can force a
> context-switch when any call_rcu_[bh|sched]() is happened on idle_task.
> After doing so, rcu_idle/irq_enter/exit() are useless, so we can simply
> make these functions empty.
>
> More important, this change does not change the functionality logically.
> Note: raise_softirq(RCU_SOFTIRQ)/rcu_sched_qs() in rcu_idle_enter() and
> outmost rcu_irq_exit() will have to wake up the ksoftirqd
> (due to in_interrupt() == 0).
>
> Before this patch After this patch:
> call_rcu_sched() in idle; call_rcu_sched() in idle
> set resched
> do other stuffs; do other stuffs
> outmost rcu_irq_exit() outmost rcu_irq_exit() (empty function)
> (or rcu_idle_enter()) (or rcu_idle_enter(), also empty function)
> start to resched. (see above)
> rcu_sched_qs() rcu_sched_qs()
> QS,and GP and advance cb QS,and GP and advance cb
> wake up the ksoftirqd wake up the ksoftirqd
> set resched
> resched to ksoftirqd (or other) resched to ksoftirqd (or other)
>
> These two code patches are almost the same.
>
> Size changed after patched:
>
> size kernel/rcu/tiny-old.o kernel/rcu/tiny-patched.o
> text data bss dec hex filename
> 3449 206 8 3663 e4f kernel/rcu/tiny-old.o
> 2406 144 8 2558 9fe kernel/rcu/tiny-patched.o
>
> Signed-off-by: Lai Jiangshan <[email protected]>

Queued for testing, thank you!

Thanx, Paul

> ---
> kernel/rcu/rcu.h | 6 +++
> kernel/rcu/tiny.c | 99 +++------------------------------------------
> kernel/rcu/tiny_plugin.h | 2 +-
> kernel/rcu/tree.c | 6 ---
> 4 files changed, 14 insertions(+), 99 deletions(-)
>
> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> index 07bb02e..80adef7 100644
> --- a/kernel/rcu/rcu.h
> +++ b/kernel/rcu/rcu.h
> @@ -137,4 +137,10 @@ int rcu_jiffies_till_stall_check(void);
>
> void rcu_early_boot_tests(void);
>
> +/*
> + * This function really isn't for public consumption, but RCU is special in
> + * that context switches can allow the state machine to make progress.
> + */
> +extern void resched_cpu(int cpu);
> +
> #endif /* __LINUX_RCU_H */
> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
> index 805b6d5..85287ec 100644
> --- a/kernel/rcu/tiny.c
> +++ b/kernel/rcu/tiny.c
> @@ -47,54 +47,14 @@ static void __call_rcu(struct rcu_head *head,
> void (*func)(struct rcu_head *rcu),
> struct rcu_ctrlblk *rcp);
>
> -static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
> -
> #include "tiny_plugin.h"
>
> -/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */
> -static void rcu_idle_enter_common(long long newval)
> -{
> - if (newval) {
> - RCU_TRACE(trace_rcu_dyntick(TPS("--="),
> - rcu_dynticks_nesting, newval));
> - rcu_dynticks_nesting = newval;
> - return;
> - }
> - RCU_TRACE(trace_rcu_dyntick(TPS("Start"),
> - rcu_dynticks_nesting, newval));
> - if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
> - struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
> -
> - RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"),
> - rcu_dynticks_nesting, newval));
> - ftrace_dump(DUMP_ALL);
> - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
> - current->pid, current->comm,
> - idle->pid, idle->comm); /* must be idle task! */
> - }
> - rcu_sched_qs(); /* implies rcu_bh_inc() */
> - barrier();
> - rcu_dynticks_nesting = newval;
> -}
> -
> /*
> * Enter idle, which is an extended quiescent state if we have fully
> - * entered that mode (i.e., if the new value of dynticks_nesting is zero).
> + * entered that mode.
> */
> void rcu_idle_enter(void)
> {
> - unsigned long flags;
> - long long newval;
> -
> - local_irq_save(flags);
> - WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
> - if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) ==
> - DYNTICK_TASK_NEST_VALUE)
> - newval = 0;
> - else
> - newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE;
> - rcu_idle_enter_common(newval);
> - local_irq_restore(flags);
> }
> EXPORT_SYMBOL_GPL(rcu_idle_enter);
>
> @@ -103,55 +63,14 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
> */
> void rcu_irq_exit(void)
> {
> - unsigned long flags;
> - long long newval;
> -
> - local_irq_save(flags);
> - newval = rcu_dynticks_nesting - 1;
> - WARN_ON_ONCE(newval < 0);
> - rcu_idle_enter_common(newval);
> - local_irq_restore(flags);
> }
> EXPORT_SYMBOL_GPL(rcu_irq_exit);
>
> -/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */
> -static void rcu_idle_exit_common(long long oldval)
> -{
> - if (oldval) {
> - RCU_TRACE(trace_rcu_dyntick(TPS("++="),
> - oldval, rcu_dynticks_nesting));
> - return;
> - }
> - RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting));
> - if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
> - struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
> -
> - RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"),
> - oldval, rcu_dynticks_nesting));
> - ftrace_dump(DUMP_ALL);
> - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
> - current->pid, current->comm,
> - idle->pid, idle->comm); /* must be idle task! */
> - }
> -}
> -
> /*
> * Exit idle, so that we are no longer in an extended quiescent state.
> */
> void rcu_idle_exit(void)
> {
> - unsigned long flags;
> - long long oldval;
> -
> - local_irq_save(flags);
> - oldval = rcu_dynticks_nesting;
> - WARN_ON_ONCE(rcu_dynticks_nesting < 0);
> - if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK)
> - rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
> - else
> - rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
> - rcu_idle_exit_common(oldval);
> - local_irq_restore(flags);
> }
> EXPORT_SYMBOL_GPL(rcu_idle_exit);
>
> @@ -160,15 +79,6 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
> */
> void rcu_irq_enter(void)
> {
> - unsigned long flags;
> - long long oldval;
> -
> - local_irq_save(flags);
> - oldval = rcu_dynticks_nesting;
> - rcu_dynticks_nesting++;
> - WARN_ON_ONCE(rcu_dynticks_nesting == 0);
> - rcu_idle_exit_common(oldval);
> - local_irq_restore(flags);
> }
> EXPORT_SYMBOL_GPL(rcu_irq_enter);
>
> @@ -179,7 +89,7 @@ EXPORT_SYMBOL_GPL(rcu_irq_enter);
> */
> bool notrace __rcu_is_watching(void)
> {
> - return rcu_dynticks_nesting;
> + return true;
> }
> EXPORT_SYMBOL(__rcu_is_watching);
>
> @@ -347,6 +257,11 @@ static void __call_rcu(struct rcu_head *head,
> rcp->curtail = &head->next;
> RCU_TRACE(rcp->qlen++);
> local_irq_restore(flags);
> +
> + if (unlikely(is_idle_task(current))) {
> + /* force scheduling for rcu_sched_qs() */
> + resched_cpu(0);
> + }
> }
>
> /*
> diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
> index 858c565..b5fc5e5 100644
> --- a/kernel/rcu/tiny_plugin.h
> +++ b/kernel/rcu/tiny_plugin.h
> @@ -147,7 +147,7 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
> js = ACCESS_ONCE(rcp->jiffies_stall);
> if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
> pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
> - rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
> + rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE,
> jiffies - rcp->gp_start, rcp->qlen);
> dump_stack();
> }
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 3f5dd16..b5ef579 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -940,12 +940,6 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
> }
>
> /*
> - * This function really isn't for public consumption, but RCU is special in
> - * that context switches can allow the state machine to make progress.
> - */
> -extern void resched_cpu(int cpu);
> -
> -/*
> * Return true if the specified CPU has passed through a quiescent
> * state by virtue of being in or having passed through an dynticks
> * idle state since the last call to dyntick_save_progress_counter()
> --
> 1.7.4.4
>