Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757713AbdDQXvz (ORCPT ); Mon, 17 Apr 2017 19:51:55 -0400 Received: from mx0a-001b2d01.pphosted.com ([148.163.156.1]:59984 "EHLO mx0a-001b2d01.pphosted.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755684AbdDQXpk (ORCPT ); Mon, 17 Apr 2017 19:45:40 -0400 From: "Paul E. McKenney" To: linux-kernel@vger.kernel.org Cc: mingo@kernel.org, jiangshanlai@gmail.com, dipankar@in.ibm.com, akpm@linux-foundation.org, mathieu.desnoyers@efficios.com, josh@joshtriplett.org, tglx@linutronix.de, peterz@infradead.org, rostedt@goodmis.org, dhowells@redhat.com, edumazet@google.com, fweisbec@gmail.com, oleg@redhat.com, bobby.prani@gmail.com, "Paul E. McKenney" Subject: [PATCH v2 tip/core/rcu 32/39] srcu: Merge ->srcu_state into ->srcu_gp_seq Date: Mon, 17 Apr 2017 16:45:19 -0700 X-Mailer: git-send-email 2.5.2 In-Reply-To: <20170417234452.GB19013@linux.vnet.ibm.com> References: <20170417234452.GB19013@linux.vnet.ibm.com> X-TM-AS-GCONF: 00 x-cbid: 17041723-0008-0000-0000-000001F8D1F6 X-IBM-SpamModules-Scores: X-IBM-SpamModules-Versions: BY=3.00006935; HX=3.00000240; KW=3.00000007; PH=3.00000004; SC=3.00000208; SDB=6.00848869; UDB=6.00419100; IPR=6.00627509; BA=6.00005292; NDR=6.00000001; ZLA=6.00000005; ZF=6.00000009; ZB=6.00000000; ZP=6.00000000; ZH=6.00000000; ZU=6.00000002; MB=3.00015073; XFM=3.00000013; UTC=2017-04-17 23:45:38 X-IBM-AV-DETECTION: SAVI=unused REMOTE=unused XFE=unused x-cbparentid: 17041723-0009-0000-0000-000034B7A303 Message-Id: <1492472726-3841-32-git-send-email-paulmck@linux.vnet.ibm.com> X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10432:,, definitions=2017-04-17_20:,, signatures=0 X-Proofpoint-Spam-Details: rule=outbound_notspam policy=outbound score=0 spamscore=0 suspectscore=4 malwarescore=0 phishscore=0 adultscore=0 bulkscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.0.1-1702020001 definitions=main-1704170205 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7952 Lines: 217 Updating ->srcu_state and ->srcu_gp_seq will lead to extremely complex race conditions given multiple callback queues, so this commit takes advantage of the two-bit state now available in rcu_seq counters to store the state in the bottom two bits of ->srcu_gp_seq. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 5 +---- kernel/rcu/rcu.h | 10 ++++++++++ kernel/rcu/srcu.c | 55 +++++++++++++++++++++++++++++++++------------------- 3 files changed, 46 insertions(+), 24 deletions(-) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index ad154a7bc114..e7dbc01b61a1 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -43,8 +43,7 @@ struct srcu_struct { unsigned long completed; unsigned long srcu_gp_seq; struct srcu_array __percpu *per_cpu_ref; - spinlock_t queue_lock; /* protect ->srcu_cblist, ->srcu_state */ - int srcu_state; + spinlock_t queue_lock; /* protect ->srcu_cblist */ struct rcu_segcblist srcu_cblist; struct delayed_work work; #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -56,7 +55,6 @@ struct srcu_struct { #define SRCU_STATE_IDLE 0 #define SRCU_STATE_SCAN1 1 #define SRCU_STATE_SCAN2 2 -#define SRCU_STATE_DONE 3 #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -85,7 +83,6 @@ void process_srcu(struct work_struct *work); .completed = -300, \ .per_cpu_ref = &name##_srcu_array, \ .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ - .srcu_state = SRCU_STATE_IDLE, \ .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\ .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ __SRCU_DEP_MAP_INIT(name) \ diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 87a0ac95b551..73e16ec4054b 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -82,6 +82,16 @@ static inline int rcu_seq_state(unsigned long s) return s & RCU_SEQ_STATE_MASK; } +/* + * Set the state portion of the pointed-to sequence number. + * The caller is responsible for preventing conflicting updates. + */ +static inline void rcu_seq_set_state(unsigned long *sp, int newstate) +{ + WARN_ON_ONCE(newstate & ~RCU_SEQ_STATE_MASK); + WRITE_ONCE(*sp, (*sp & ~RCU_SEQ_STATE_MASK) + newstate); +} + /* Adjust sequence number for start of update-side operation. */ static inline void rcu_seq_start(unsigned long *sp) { diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 5aeeaecfb673..97aec5d7b316 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -44,7 +44,6 @@ static int init_srcu_struct_fields(struct srcu_struct *sp) sp->completed = 0; sp->srcu_gp_seq = 0; spin_lock_init(&sp->queue_lock); - sp->srcu_state = SRCU_STATE_IDLE; rcu_segcblist_init(&sp->srcu_cblist); INIT_DELAYED_WORK(&sp->work, process_srcu); sp->per_cpu_ref = alloc_percpu(struct srcu_array); @@ -180,6 +179,9 @@ static bool srcu_readers_active(struct srcu_struct *sp) return sum; } +#define SRCU_CALLBACK_BATCH 10 +#define SRCU_INTERVAL 1 + /** * cleanup_srcu_struct - deconstruct a sleep-RCU structure * @sp: structure to clean up. @@ -194,8 +196,10 @@ void cleanup_srcu_struct(struct srcu_struct *sp) if (WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist))) return; /* Leakage unless caller handles error. */ flush_delayed_work(&sp->work); - if (WARN_ON(READ_ONCE(sp->srcu_state) != SRCU_STATE_IDLE)) + if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE)) { + pr_info("cleanup_srcu_struct: Active srcu_struct %lu CBs %c state: %d\n", rcu_segcblist_n_cbs(&sp->srcu_cblist), ".E"[rcu_segcblist_empty(&sp->srcu_cblist)], rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); return; /* Caller forgot to stop doing call_srcu()? */ + } free_percpu(sp->per_cpu_ref); sp->per_cpu_ref = NULL; } @@ -247,10 +251,13 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); */ static void srcu_gp_start(struct srcu_struct *sp) { + int state; + rcu_segcblist_accelerate(&sp->srcu_cblist, rcu_seq_snap(&sp->srcu_gp_seq)); - WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1); rcu_seq_start(&sp->srcu_gp_seq); + state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); + WARN_ON_ONCE(state != SRCU_STATE_SCAN1); } /* @@ -294,7 +301,6 @@ static void srcu_flip(struct srcu_struct *sp) static void srcu_gp_end(struct srcu_struct *sp) { rcu_seq_end(&sp->srcu_gp_seq); - WRITE_ONCE(sp->srcu_state, SRCU_STATE_DONE); spin_lock_irq(&sp->queue_lock); rcu_segcblist_advance(&sp->srcu_cblist, @@ -339,7 +345,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, spin_lock_irqsave(&sp->queue_lock, flags); smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */ rcu_segcblist_enqueue(&sp->srcu_cblist, head, false); - if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) { + if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_IDLE) { srcu_gp_start(sp); queue_delayed_work(system_power_efficient_wq, &sp->work, 0); } @@ -372,7 +378,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) head->func = wakeme_after_rcu; spin_lock_irq(&sp->queue_lock); smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */ - if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) { + if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_IDLE) { /* steal the processing owner */ rcu_segcblist_enqueue(&sp->srcu_cblist, head, false); srcu_gp_start(sp); @@ -474,9 +480,6 @@ unsigned long srcu_batches_completed(struct srcu_struct *sp) } EXPORT_SYMBOL_GPL(srcu_batches_completed); -#define SRCU_CALLBACK_BATCH 10 -#define SRCU_INTERVAL 1 - /* * Core SRCU state machine. Advance callbacks from ->batch_check0 to * ->batch_check1 and then to ->batch_done as readers drain. @@ -485,28 +488,40 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount) { int idx; - WARN_ON_ONCE(sp->srcu_state == SRCU_STATE_IDLE); - /* * Because readers might be delayed for an extended period after * fetching ->completed for their index, at any point in time there * might well be readers using both idx=0 and idx=1. We therefore * need to wait for readers to clear from both index values before * invoking a callback. + * + * The load-acquire ensures that we see the accesses performed + * by the prior grace period. */ + idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ + if (idx == SRCU_STATE_IDLE) { + spin_lock_irq(&sp->queue_lock); + if (rcu_segcblist_empty(&sp->srcu_cblist)) { + spin_unlock_irq(&sp->queue_lock); + return; + } + idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); + if (idx == SRCU_STATE_IDLE) + srcu_gp_start(sp); + spin_unlock_irq(&sp->queue_lock); + if (idx != SRCU_STATE_IDLE) + return; /* Someone else started the grace period. */ + } - if (sp->srcu_state == SRCU_STATE_DONE) - srcu_gp_start(sp); - - if (sp->srcu_state == SRCU_STATE_SCAN1) { + if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { idx = 1 ^ (sp->completed & 1); if (!try_check_zero(sp, idx, trycount)) return; /* readers present, retry after SRCU_INTERVAL */ srcu_flip(sp); - WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN2); + rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2); } - if (sp->srcu_state == SRCU_STATE_SCAN2) { + if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { /* * SRCU read-side critical sections are normally short, @@ -557,14 +572,14 @@ static void srcu_invoke_callbacks(struct srcu_struct *sp) static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) { bool pending = true; + int state; if (rcu_segcblist_empty(&sp->srcu_cblist)) { spin_lock_irq(&sp->queue_lock); + state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); if (rcu_segcblist_empty(&sp->srcu_cblist) && - READ_ONCE(sp->srcu_state) == SRCU_STATE_DONE) { - WRITE_ONCE(sp->srcu_state, SRCU_STATE_IDLE); + state == SRCU_STATE_IDLE) pending = false; - } spin_unlock_irq(&sp->queue_lock); } -- 2.5.2