Currently, when first find out the expedited grace period is not end
and timeout occurred, we set tick dependency for CPUs that have not
yet reported the quiescent state in the rcu_node structure's->expmask
but need to eliminate races between set and clear tick dependency, setting
CPU tick dependency need to hold rcu_node structure's->lock.
This commit move tick dependency setting into rcu_exp_handler(), set tick
dependency when fail to report a quiescent state and clear tick dependency
in rcu_report_exp_rdp(). [from Frederic Weisbecker suggestion]
Signed-off-by: Zqiang <[email protected]>
---
kernel/rcu/tree_exp.h | 42 +++++++++++++++++-------------------------
1 file changed, 17 insertions(+), 25 deletions(-)
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 7cc4856da081..02605048830c 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -586,39 +586,19 @@ static bool synchronize_rcu_expedited_wait_once(long tlimit)
static void synchronize_rcu_expedited_wait(void)
{
int cpu;
- unsigned long j;
unsigned long jiffies_stall;
unsigned long jiffies_start;
unsigned long mask;
int ndetected;
- struct rcu_data *rdp;
struct rcu_node *rnp;
struct rcu_node *rnp_root = rcu_get_root();
- unsigned long flags;
trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
jiffies_stall = rcu_exp_jiffies_till_stall_check();
jiffies_start = jiffies;
- if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
- if (synchronize_rcu_expedited_wait_once(1))
- return;
- rcu_for_each_leaf_node(rnp) {
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
- mask = READ_ONCE(rnp->expmask);
- for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
- rdp = per_cpu_ptr(&rcu_data, cpu);
- if (rdp->rcu_forced_tick_exp)
- continue;
- rdp->rcu_forced_tick_exp = true;
- if (cpu_online(cpu))
- tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
- }
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- }
- j = READ_ONCE(jiffies_till_first_fqs);
- if (synchronize_rcu_expedited_wait_once(j + HZ))
- return;
- }
+
+ if (synchronize_rcu_expedited_wait_once(1))
+ return;
for (;;) {
if (synchronize_rcu_expedited_wait_once(jiffies_stall))
@@ -653,8 +633,6 @@ static void synchronize_rcu_expedited_wait(void)
if (ndetected) {
pr_err("blocking rcu_node structures (internal RCU debug):");
rcu_for_each_node_breadth_first(rnp) {
- if (rnp == rnp_root)
- continue; /* printed unconditionally */
if (sync_rcu_exp_done_unlocked(rnp))
continue;
pr_cont(" l=%u:%d-%d:%#lx/%c",
@@ -729,6 +707,17 @@ static void rcu_exp_sel_wait_wake(unsigned long s)
#ifdef CONFIG_PREEMPT_RCU
+static void rcu_exp_set_tickdep(int cpu)
+{
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+ if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended() &&
+ cpu_online(cpu)) {
+ rdp->rcu_forced_tick_exp = true;
+ tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
+ }
+}
+
/*
* Remote handler for smp_call_function_single(). If there is an
* RCU read-side critical section in effect, request that the
@@ -743,6 +732,7 @@ static void rcu_exp_handler(void *unused)
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp = rdp->mynode;
struct task_struct *t = current;
+ int cpu = raw_smp_processor_id();
/*
* First, the common case of not being in an RCU read-side
@@ -757,6 +747,7 @@ static void rcu_exp_handler(void *unused)
WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
set_tsk_need_resched(t);
set_preempt_need_resched();
+ rcu_exp_set_tickdep(cpu);
}
return;
}
@@ -778,6 +769,7 @@ static void rcu_exp_handler(void *unused)
if (rnp->expmask & rdp->grpmask) {
WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
t->rcu_read_unlock_special.b.exp_hint = true;
+ rcu_exp_set_tickdep(cpu);
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
--
2.25.1
On Wed, Jan 04, 2023 at 07:47:22PM +0800, Zqiang wrote:
> @@ -586,39 +586,19 @@ static bool synchronize_rcu_expedited_wait_once(long tlimit)
> static void synchronize_rcu_expedited_wait(void)
> {
> int cpu;
> - unsigned long j;
> unsigned long jiffies_stall;
> unsigned long jiffies_start;
> unsigned long mask;
> int ndetected;
> - struct rcu_data *rdp;
> struct rcu_node *rnp;
> struct rcu_node *rnp_root = rcu_get_root();
> - unsigned long flags;
>
> trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
> jiffies_stall = rcu_exp_jiffies_till_stall_check();
> jiffies_start = jiffies;
> - if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
> - if (synchronize_rcu_expedited_wait_once(1))
> - return;
> - rcu_for_each_leaf_node(rnp) {
> - raw_spin_lock_irqsave_rcu_node(rnp, flags);
> - mask = READ_ONCE(rnp->expmask);
> - for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
> - rdp = per_cpu_ptr(&rcu_data, cpu);
> - if (rdp->rcu_forced_tick_exp)
> - continue;
> - rdp->rcu_forced_tick_exp = true;
> - if (cpu_online(cpu))
> - tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
> - }
> - raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> - }
> - j = READ_ONCE(jiffies_till_first_fqs);
> - if (synchronize_rcu_expedited_wait_once(j + HZ))
> - return;
> - }
> +
> + if (synchronize_rcu_expedited_wait_once(1))
> + return;
Do we still need this? This is immediately followed by the same call with
a higher time limit.
>
> for (;;) {
> if (synchronize_rcu_expedited_wait_once(jiffies_stall))
> @@ -653,8 +633,6 @@ static void synchronize_rcu_expedited_wait(void)
> if (ndetected) {
> pr_err("blocking rcu_node structures (internal RCU debug):");
> rcu_for_each_node_breadth_first(rnp) {
> - if (rnp == rnp_root)
> - continue; /* printed unconditionally */
Is that chunk from another patch?
> if (sync_rcu_exp_done_unlocked(rnp))
> continue;
> pr_cont(" l=%u:%d-%d:%#lx/%c",
> @@ -729,6 +707,17 @@ static void rcu_exp_sel_wait_wake(unsigned long s)
>
> #ifdef CONFIG_PREEMPT_RCU
>
> +static void rcu_exp_set_tickdep(int cpu)
> +{
> + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
This is only used locally so you can use this_cpu_ptr() or even
better you can pass the rdp from rcu_exp_handler()
> +
> + if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended() &&
You can directly test tick_nohz_full_cpu(smp_processor_id()), this has the
advantage to only evaluate smp_processor_id() if nohz_full is current running
(using static key). It's a micro optimization, but still...
> + cpu_online(cpu)) {
Right good point, this could happen if rcu_exp_handler() is called from
smpcfd_dying_cpu(). In which case we can't requeue an IPI or an IRQ work.
Which is not a problem since rcu_report_dead() -> rcu_preempt_deferred_qs()
then does the QS report. Can you add a comment about that?
> + rdp->rcu_forced_tick_exp = true;
> + tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
> + }
> +}
> +
> /*
> * Remote handler for smp_call_function_single(). If there is an
> * RCU read-side critical section in effect, request that the
> @@ -743,6 +732,7 @@ static void rcu_exp_handler(void *unused)
> struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> struct rcu_node *rnp = rdp->mynode;
> struct task_struct *t = current;
> + int cpu = raw_smp_processor_id();
And then no need to fetch smp_processor_id() here.
Thanks.