Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S967352AbdDSQ7J (ORCPT ); Wed, 19 Apr 2017 12:59:09 -0400 Received: from mx0a-001b2d01.pphosted.com ([148.163.156.1]:60026 "EHLO mx0a-001b2d01.pphosted.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S967312AbdDSQ6x (ORCPT ); Wed, 19 Apr 2017 12:58:53 -0400 From: "Paul E. McKenney" To: linux-kernel@vger.kernel.org Cc: mingo@kernel.org, jiangshanlai@gmail.com, dipankar@in.ibm.com, akpm@linux-foundation.org, mathieu.desnoyers@efficios.com, josh@joshtriplett.org, tglx@linutronix.de, peterz@infradead.org, rostedt@goodmis.org, dhowells@redhat.com, edumazet@google.com, fweisbec@gmail.com, oleg@redhat.com, bobby.prani@gmail.com, "Paul E. McKenney" Subject: [PATCH v3 tip/core/rcu 21/40] srcu: Move to state-based grace-period sequencing Date: Wed, 19 Apr 2017 09:58:18 -0700 X-Mailer: git-send-email 2.5.2 In-Reply-To: <20170419165805.GB10874@linux.vnet.ibm.com> References: <20170419165805.GB10874@linux.vnet.ibm.com> X-TM-AS-GCONF: 00 x-cbid: 17041916-0036-0000-0000-000001E5BC85 X-IBM-SpamModules-Scores: X-IBM-SpamModules-Versions: BY=3.00006939; HX=3.00000240; KW=3.00000007; PH=3.00000004; SC=3.00000208; SDB=6.00849682; UDB=6.00419583; IPR=6.00628316; BA=6.00005304; NDR=6.00000001; ZLA=6.00000005; ZF=6.00000009; ZB=6.00000000; ZP=6.00000000; ZH=6.00000000; ZU=6.00000002; MB=3.00015095; XFM=3.00000013; UTC=2017-04-19 16:58:44 X-IBM-AV-DETECTION: SAVI=unused REMOTE=unused XFE=unused x-cbparentid: 17041916-0037-0000-0000-00003FD5DAD8 Message-Id: <1492621117-13939-21-git-send-email-paulmck@linux.vnet.ibm.com> X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10432:,, definitions=2017-04-19_15:,, signatures=0 X-Proofpoint-Spam-Details: rule=outbound_notspam policy=outbound score=0 spamscore=0 suspectscore=3 malwarescore=0 phishscore=0 adultscore=0 bulkscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.0.1-1703280000 definitions=main-1704190140 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8294 Lines: 222 The current SRCU grace-period processing might never reach the last portion of srcu_advance_batches(). This is OK given the current implementation, as the first portion, up to the try_check_zero() following the srcu_flip() is sufficient to drive grace periods forward. However, it has the unfortunate side-effect of making it impossible to determine when a given grace period has ended, and it will be necessary to efficiently trace ends of grace periods in order to efficiently handle per-CPU SRCU callback lists. This commit therefore adds states to the SRCU grace-period processing, so that the end of a given SRCU grace period is marked by the transition to the SRCU_STATE_DONE state. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 10 ++++- kernel/rcu/srcu.c | 111 ++++++++++++++++++++++++++++----------------------- 2 files changed, 69 insertions(+), 52 deletions(-) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index a598cf3ac70c..f149a685896c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -48,7 +48,7 @@ struct srcu_struct { unsigned long completed; struct srcu_array __percpu *per_cpu_ref; spinlock_t queue_lock; /* protect ->batch_queue, ->running */ - bool running; + int srcu_state; /* callbacks just queued */ struct rcu_batch batch_queue; /* callbacks try to do the first check_zero */ @@ -62,6 +62,12 @@ struct srcu_struct { #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ }; +/* Values for -> state variable. */ +#define SRCU_STATE_IDLE 0 +#define SRCU_STATE_SCAN1 1 +#define SRCU_STATE_SCAN2 2 +#define SRCU_STATE_DONE 3 + #ifdef CONFIG_DEBUG_LOCK_ALLOC int __init_srcu_struct(struct srcu_struct *sp, const char *name, @@ -89,7 +95,7 @@ void process_srcu(struct work_struct *work); .completed = -300, \ .per_cpu_ref = &name##_srcu_array, \ .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ - .running = false, \ + .srcu_state = SRCU_STATE_IDLE, \ .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \ .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \ diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 821ecda873f2..70286f68f3a1 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -111,7 +111,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp) { sp->completed = 0; spin_lock_init(&sp->queue_lock); - sp->running = false; + sp->srcu_state = SRCU_STATE_IDLE; rcu_batch_init(&sp->batch_queue); rcu_batch_init(&sp->batch_check0); rcu_batch_init(&sp->batch_check1); @@ -270,7 +270,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp) if (WARN_ON(!rcu_all_batches_empty(sp))) return; /* Leakage unless caller handles error. */ flush_delayed_work(&sp->work); - if (WARN_ON(sp->running)) + if (WARN_ON(READ_ONCE(sp->srcu_state) != SRCU_STATE_IDLE)) return; /* Caller forgot to stop doing call_srcu()? */ free_percpu(sp->per_cpu_ref); sp->per_cpu_ref = NULL; @@ -391,8 +391,8 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, spin_lock_irqsave(&sp->queue_lock, flags); smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */ rcu_batch_queue(&sp->batch_queue, head); - if (!sp->running) { - sp->running = true; + if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) { + WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1); queue_delayed_work(system_power_efficient_wq, &sp->work, 0); } spin_unlock_irqrestore(&sp->queue_lock, flags); @@ -424,9 +424,9 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) head->func = wakeme_after_rcu; spin_lock_irq(&sp->queue_lock); smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */ - if (!sp->running) { + if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) { /* steal the processing owner */ - sp->running = true; + WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1); rcu_batch_queue(&sp->batch_check0, head); spin_unlock_irq(&sp->queue_lock); /* give the processing owner to work_struct */ @@ -548,7 +548,9 @@ static void srcu_collect_new(struct srcu_struct *sp) */ static void srcu_advance_batches(struct srcu_struct *sp, int trycount) { - int idx = 1 ^ (sp->completed & 1); + int idx; + + WARN_ON_ONCE(sp->srcu_state == SRCU_STATE_IDLE); /* * Because readers might be delayed for an extended period after @@ -558,48 +560,56 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount) * invoking a callback. */ - if (rcu_batch_empty(&sp->batch_check0) && - rcu_batch_empty(&sp->batch_check1)) - return; /* no callbacks need to be advanced */ - - if (!try_check_zero(sp, idx, trycount)) - return; /* failed to advance, will try after SRCU_INTERVAL */ - - /* - * The callbacks in ->batch_check1 have already done with their - * first zero check and flip back when they were enqueued on - * ->batch_check0 in a previous invocation of srcu_advance_batches(). - * (Presumably try_check_zero() returned false during that - * invocation, leaving the callbacks stranded on ->batch_check1.) - * They are therefore ready to invoke, so move them to ->batch_done. - */ - rcu_batch_move(&sp->batch_done, &sp->batch_check1); - - if (rcu_batch_empty(&sp->batch_check0)) - return; /* no callbacks need to be advanced */ - srcu_flip(sp); - - /* - * The callbacks in ->batch_check0 just finished their - * first check zero and flip, so move them to ->batch_check1 - * for future checking on the other idx. - */ - rcu_batch_move(&sp->batch_check1, &sp->batch_check0); - - /* - * SRCU read-side critical sections are normally short, so check - * at least twice in quick succession after a flip. - */ - trycount = trycount < 2 ? 2 : trycount; - if (!try_check_zero(sp, idx^1, trycount)) - return; /* failed to advance, will try after SRCU_INTERVAL */ + if (sp->srcu_state == SRCU_STATE_DONE) + WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1); + + if (sp->srcu_state == SRCU_STATE_SCAN1) { + idx = 1 ^ (sp->completed & 1); + if (!try_check_zero(sp, idx, trycount)) + return; /* readers present, retry after SRCU_INTERVAL */ + + /* + * The callbacks in ->batch_check1 have already done + * with their first zero check and flip back when they were + * enqueued on ->batch_check0 in a previous invocation of + * srcu_advance_batches(). (Presumably try_check_zero() + * returned false during that invocation, leaving the + * callbacks stranded on ->batch_check1.) They are therefore + * ready to invoke, so move them to ->batch_done. + */ + rcu_batch_move(&sp->batch_done, &sp->batch_check1); + srcu_flip(sp); + + /* + * The callbacks in ->batch_check0 just finished their + * first check zero and flip, so move them to ->batch_check1 + * for future checking on the other idx. + */ + rcu_batch_move(&sp->batch_check1, &sp->batch_check0); + + WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN2); + } - /* - * The callbacks in ->batch_check1 have now waited for all - * pre-existing readers using both idx values. They are therefore - * ready to invoke, so move them to ->batch_done. - */ - rcu_batch_move(&sp->batch_done, &sp->batch_check1); + if (sp->srcu_state == SRCU_STATE_SCAN2) { + + /* + * SRCU read-side critical sections are normally short, + * so check at least twice in quick succession after a flip. + */ + idx = 1 ^ (sp->completed & 1); + trycount = trycount < 2 ? 2 : trycount; + if (!try_check_zero(sp, idx, trycount)) + return; /* readers present, retry after SRCU_INTERVAL */ + + /* + * The callbacks in ->batch_check1 have now waited for + * all pre-existing readers using both idx values. They are + * therefore ready to invoke, so move them to ->batch_done. + */ + rcu_batch_move(&sp->batch_done, &sp->batch_check1); + + WRITE_ONCE(sp->srcu_state, SRCU_STATE_DONE); + } } /* @@ -633,8 +643,9 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) if (rcu_all_batches_empty(sp)) { spin_lock_irq(&sp->queue_lock); - if (rcu_all_batches_empty(sp)) { - sp->running = false; + if (rcu_all_batches_empty(sp) && + READ_ONCE(sp->srcu_state) == SRCU_STATE_DONE) { + WRITE_ONCE(sp->srcu_state, SRCU_STATE_IDLE); pending = false; } spin_unlock_irq(&sp->queue_lock); -- 2.5.2