Received-SPF: pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67;
From:   <lianglihao@huawei.com>
To:     <paulmck@linux.vnet.ibm.com>
CC:     <guohanjun@huawei.com>, <heng.z@huawei.com>, <hb.chen@huawei.com>,
        <lihao.liang@gmail.com>, <linux-kernel@vger.kernel.org>
Subject: [PATCH RFC 13/16] prcu: Comment source code
Date:   Tue, 23 Jan 2018 15:59:38 +0800
Message-ID: <1516694381-20333-14-git-send-email-lianglihao@huawei.com>
In-Reply-To: <1516694381-20333-1-git-send-email-lianglihao@huawei.com>
References: <1516694381-20333-1-git-send-email-lianglihao@huawei.com>
MIME-Version: 1.0
Content-Type: text/plain
Sender: linux-kernel-owner@vger.kernel.org
Precedence: bulk

From: Lihao Liang <lianglihao@huawei.com>

Signed-off-by: Lihao Liang <lianglihao@huawei.com>
---
 include/linux/prcu.h |  73 ++++++++++++++++-----
 kernel/rcu/prcu.c    | 178 +++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 225 insertions(+), 26 deletions(-)

diff --git a/include/linux/prcu.h b/include/linux/prcu.h
index bb20fa40..9f740985 100644
--- a/include/linux/prcu.h
+++ b/include/linux/prcu.h
@@ -1,3 +1,11 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (PRCU version).
+ * PRCU public definitions.
+ *
+ * Authors: Heng Zhang <heng.z@huawei.com>
+ *          Lihao Liang <lianglihao@huawei.com>
+ */
+
 #ifndef __LINUX_PRCU_H
 #define __LINUX_PRCU_H
 
@@ -8,12 +16,26 @@
 #include <linux/completion.h>
 
 #ifdef CONFIG_PRCU
+
+/*
+ * Simple list structure of callback versions.
+ *
+ * Note: Ideally, we would like to add the version field
+ * to the rcu_head struct.  But if we do so, other users of
+ * rcu_head in the Linux kernel will complain hard and loudly.
+ */
 struct prcu_version_head {
 	unsigned long long version;
 	struct prcu_version_head *next;
 };
 
-/* Simple unsegmented callback list for PRCU. */
+/*
+ * Simple unsegmented callback list for PRCU.
+ *
+ * Note: Since we can't add a new version field to rcu_head,
+ * we have to make our own callback list for PRCU instead of
+ * using the existing rcu_cblist. Sigh!
+ */
 struct prcu_cblist {
 	struct rcu_head *head;
 	struct rcu_head **tail;
@@ -27,31 +49,47 @@ struct prcu_cblist {
 	.version_head = NULL, .version_tail = &n.version_head, \
 }
 
+/*
+ * PRCU's per-CPU state.
+ */
 struct prcu_local_struct {
-	unsigned int locked;
-	unsigned int online;
-	unsigned long long version;
-	unsigned long long cb_version;
-	struct rcu_head barrier_head;
-	struct prcu_cblist cblist;
+	unsigned int locked;	       /* Nesting level of PRCU read-side */
+				       /*  critcal sections */
+	unsigned int online;	       /* Indicates whether a context-switch */
+				       /*  has occurred on this CPU */
+	unsigned long long version;    /* Local grace-period version */
+	unsigned long long cb_version; /* Local callback version */
+	struct rcu_head barrier_head;  /* PRCU callback list */
+	struct prcu_cblist cblist;     /* PRCU callback version list */
 };
 
+/*
+ * PRCU's global state.
+ */
 struct prcu_struct {
-	atomic64_t global_version;
-	atomic64_t cb_version;
-	atomic_t active_ctr;
-	atomic_t barrier_cpu_count;
-	struct mutex mtx;
-	struct mutex barrier_mtx;
-	wait_queue_head_t wait_q;
-	struct completion barrier_completion;
+	atomic64_t global_version;	      /* Global grace-period version */
+	atomic64_t cb_version;		      /* Global callback version */
+	atomic_t active_ctr;		      /* Outstanding PRCU tasks */
+					      /*  being context-switched */
+	atomic_t barrier_cpu_count;	      /* # CPUs waiting on prcu_barrier() */
+	struct mutex mtx;		      /* Serialize synchronize_prcu() */
+	struct mutex barrier_mtx;	      /* Serialize prcu_barrier() */
+	wait_queue_head_t wait_q;             /* Wait for synchronize_prcu() */
+	struct completion barrier_completion; /* Wait for prcu_barrier() */
 };
 
+/*
+ * PRCU APIs.
+ */
 void prcu_read_lock(void);
 void prcu_read_unlock(void);
 void synchronize_prcu(void);
 void call_prcu(struct rcu_head *head, rcu_callback_t func);
 void prcu_barrier(void);
+
+/*
+ * Internal non-public functions.
+ */
 void prcu_init(void);
 void prcu_note_context_switch(void);
 int prcu_pending(void);
@@ -60,11 +98,16 @@ void prcu_check_callbacks(void);
 
 #else /* #ifdef CONFIG_PRCU */
 
+/*
+ * If CONFIG_PRCU is not defined,
+ * map its APIs to RCU's counterparts.
+ */
 #define prcu_read_lock rcu_read_lock
 #define prcu_read_unlock rcu_read_unlock
 #define synchronize_prcu synchronize_rcu
 #define call_prcu call_rcu
 #define prcu_barrier rcu_barrier
+
 #define prcu_init() do {} while (0)
 #define prcu_note_context_switch() do {} while (0)
 #define prcu_pending() 0
diff --git a/kernel/rcu/prcu.c b/kernel/rcu/prcu.c
index 49cb70e6..ef2c7730 100644
--- a/kernel/rcu/prcu.c
+++ b/kernel/rcu/prcu.c
@@ -1,3 +1,17 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (PRCU version).
+ * This PRCU implementation is based on a fast consensus protocol
+ * published in the following paper:
+ *
+ * Fast Consensus Using Bounded Staleness for Scalable Read-mostly Synchronization.
+ * Haibo Chen, Heng Zhang, Ran Liu, Binyu Zang, and Haibing Guan.
+ * IEEE Transactions on Parallel and Distributed Systems (TPDS), 2016.
+ * https://dl.acm.org/citation.cfm?id=3024114.3024143
+ *
+ * Authors: Heng Zhang <heng.z@huawei.com>
+ *          Lihao Liang <lianglihao@huawei.com>
+ */
+
 #include <linux/smp.h>
 #include <linux/percpu.h>
 #include <linux/prcu.h>
@@ -8,8 +22,16 @@
 
 #include "rcu.h"
 
+/* Data structures. */
+
+/*
+ * Initialize PRCU's per-CPU local structure.
+ */
 DEFINE_PER_CPU_SHARED_ALIGNED(struct prcu_local_struct, prcu_local);
 
+/*
+ * Initialize PRCU's global structure.
+ */
 struct prcu_struct global_prcu = {
 	.global_version = ATOMIC64_INIT(0),
 	.cb_version = ATOMIC64_INIT(0),
@@ -20,7 +42,9 @@ struct prcu_struct global_prcu = {
 };
 struct prcu_struct *prcu = &global_prcu;
 
-/* Initialize simple callback list. */
+/*
+ * Initialize simple PRCU callback list.
+ */
 static void prcu_cblist_init(struct prcu_cblist *rclp)
 {
 	rclp->head = NULL;
@@ -31,8 +55,8 @@ static void prcu_cblist_init(struct prcu_cblist *rclp)
 }
 
 /*
- * Dequeue the oldest rcu_head structure from the specified callback list;
- * store the callback grace period version number into the version pointer.
+ * Dequeue the oldest rcu_head structure from the specified callback list.
+ * Store the callback version number into the version pointer.
  */
 static struct rcu_head *prcu_cblist_dequeue(struct prcu_cblist *rclp)
 {
@@ -59,6 +83,11 @@ static struct rcu_head *prcu_cblist_dequeue(struct prcu_cblist *rclp)
 	return rhp;
 }
 
+/* PRCU function implementations. */
+
+/*
+ * Update local PRCU state of the current CPU.
+ */
 static inline void prcu_report(struct prcu_local_struct *local)
 {
 	unsigned long long global_version;
@@ -70,6 +99,15 @@ static inline void prcu_report(struct prcu_local_struct *local)
 		cmpxchg(&local->version, local_version, global_version);
 }
 
+/*
+ * Mark the beginning of a PRCU read-side critical section.
+ *
+ * A PRCU quiescent state of a CPU is when its local ->locked and
+ * ->online variables become 0.
+ *
+ * See prcu_read_unlock() and synchronize_prcu() for more information.
+ * Also see rcu_read_lock() comment header.
+ */
 void prcu_read_lock(void)
 {
 	struct prcu_local_struct *local;
@@ -77,29 +115,50 @@ void prcu_read_lock(void)
 	local = get_cpu_ptr(&prcu_local);
 	if (!local->online) {
 		WRITE_ONCE(local->online, 1);
+		/*
+		 * Memory barrier is needed for PRCU writers
+		 * to see the updated local->online value.
+		 */
 		smp_mb();
 	}
-
 	local->locked++;
+	/*
+	 * Critical section after entry code.
+	 * put_cpu_ptr() provides the needed barrier().
+	 */
 	put_cpu_ptr(&prcu_local);
 }
 EXPORT_SYMBOL(prcu_read_lock);
 
+/*
+ * Mark the end of a PRCU read-side critical section.
+ *
+ * See prcu_read_lock() and synchronize_prcu() for more information.
+ * Also see rcu_read_unlock() comment header.
+ */
 void prcu_read_unlock(void)
 {
 	int locked;
 	struct prcu_local_struct *local;
 
-	barrier();
+	barrier(); /* Critical section before exit code. */
 	local = get_cpu_ptr(&prcu_local);
 	locked = local->locked;
 	if (locked) {
 		local->locked--;
+		/*
+		 * If we are executing the last PRCU task,
+		 * update the CPU-local PRCU state.
+		 */
 		if (locked == 1)
 			prcu_report(local);
 		put_cpu_ptr(&prcu_local);
 	} else {
 		put_cpu_ptr(&prcu_local);
+		/*
+		 * If we are executing the last outstanding
+		 * PRCU task, wake up synchronize_prcu().
+		 */
 		if (!atomic_dec_return(&prcu->active_ctr))
 			wake_up(&prcu->wait_q);
 	}
@@ -111,10 +170,25 @@ static void prcu_handler(void *info)
 	struct prcu_local_struct *local;
 
 	local = this_cpu_ptr(&prcu_local);
+	/*
+	 * We need to do this check locally on the current CPU
+	 * because no memory barrier is used for ->locked so
+	 * PRCU writers may not see its latest local value.
+	 */
 	if (!local->locked)
 		WRITE_ONCE(local->version, atomic64_read(&prcu->global_version));
 }
 
+/*
+ * Wait until a grace period has completed.
+ *
+ * A PRCU grace period can end if each CPU has passed a PRCU quiescent state
+ * -and- the global variable ->active_ctr is 0, that is all pre-existing
+ * PRCU read-side critical sections have completed.
+ *
+ * See prcu_read_lock() and prcu_read_unlock() for more information.
+ * Also see synchronize_rcu() comment header.
+ */
 void synchronize_prcu(void)
 {
 	int cpu;
@@ -122,7 +196,13 @@ void synchronize_prcu(void)
 	unsigned long long version;
 	struct prcu_local_struct *local;
 
+	/*
+	 * Get the new global grace-period version before taking mutex,
+	 * which allows multiple synchronize_prcu() calls spreading PRCU
+	 * readers can return in a timely fashion.
+	 */
 	version = atomic64_add_return(1, &prcu->global_version);
+	/* Take mutex to serialize concurrent synchronize_prcu() calls. */
 	mutex_lock(&prcu->mtx);
 
 	local = get_cpu_ptr(&prcu_local);
@@ -130,8 +210,14 @@ void synchronize_prcu(void)
 	put_cpu_ptr(&prcu_local);
 
 	cpumask_clear(&cpus);
+	/* Send an IPI to force straggling CPUs to update their PRCU state. */
 	for_each_possible_cpu(cpu) {
 		local = per_cpu_ptr(&prcu_local, cpu);
+		/*
+		 * If no PRCU tasks are currently running on this CPU
+		 * or a context-switch has occurred, the CPU-local PRCU
+		 * state has already been updated.
+		 */
 		if (!READ_ONCE(local->online))
 			continue;
 		if (READ_ONCE(local->version) < version) {
@@ -140,34 +226,46 @@ void synchronize_prcu(void)
 		}
 	}
 
+	/* Wait for outstanding CPUs to commit. */
 	for_each_cpu(cpu, &cpus) {
 		local = per_cpu_ptr(&prcu_local, cpu);
 		while (READ_ONCE(local->version) < version)
 			cpu_relax();
 	}
 
+	/* Wait for outstanding PRCU tasks to finish. */
 	if (atomic_read(&prcu->active_ctr))
 		wait_event(prcu->wait_q, !atomic_read(&prcu->active_ctr));
-
+	/* Update the global callback version to its grace-period version. */
 	atomic64_set(&prcu->cb_version, version);
 	mutex_unlock(&prcu->mtx);
 }
 EXPORT_SYMBOL(synchronize_prcu);
 
+/*
+ * Update PRCU state when a context-switch occurs.
+ */
 void prcu_note_context_switch(void)
 {
 	struct prcu_local_struct *local;
 
 	local = get_cpu_ptr(&prcu_local);
+	/* Update local and global outstanding PRCU task number. */
 	if (local->locked) {
 		atomic_add(local->locked, &prcu->active_ctr);
 		local->locked = 0;
 	}
+	/* Indicate a context-switch has occurred on this CPU. */
 	local->online = 0;
+	/* Update this CPU's local PRCU state. */
 	prcu_report(local);
 	put_cpu_ptr(&prcu_local);
 }
 
+/*
+ * Queue a PRCU callback to the current CPU for invocation
+ * after a grace period.
+ */
 void call_prcu(struct rcu_head *head, rcu_callback_t func)
 {
 	unsigned long flags;
@@ -177,8 +275,12 @@ void call_prcu(struct rcu_head *head, rcu_callback_t func)
 
 	debug_rcu_head_queue(head);
 
-	/* Use GFP_ATOMIC with IRQs disabled */
+	/* Use GFP_ATOMIC with IRQs disabled. */
 	vhp = kmalloc(sizeof(struct prcu_version_head), GFP_ATOMIC);
+	/*
+	 * Complain about kmalloc() failure.  This could be handled
+	 * in a different way, e.g. return -1 to inform the caller.
+	 */
 	if (!vhp) {
 		WARN_ON(1);
 		return;
@@ -188,8 +290,13 @@ void call_prcu(struct rcu_head *head, rcu_callback_t func)
 	head->next = NULL;
 	vhp->next = NULL;
 
+	/* Disable IRQs to prevent races with prcu_process_callbacks(). */
 	local_irq_save(flags);
 	local = this_cpu_ptr(&prcu_local);
+	/*
+	 * Assign the CPU-local callback version to the given callback
+	 * and add it to the PRCU callback list of the current CPU.
+	 */
 	vhp->version = local->version;
 	rclp = &local->cblist;
 	rclp->len++;
@@ -201,6 +308,13 @@ void call_prcu(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL(call_prcu);
 
+/*
+ * Check to see if there is any immediate PRCU-related work
+ * to be done by the current CPU, returning 1 if so.
+ *
+ * Currently, it only checks whether this CPU has callbacks
+ * that are ready to invoke.
+ */
 int prcu_pending(void)
 {
 	struct prcu_local_struct *local = get_cpu_ptr(&prcu_local);
@@ -211,18 +325,33 @@ int prcu_pending(void)
 	return cb_version < atomic64_read(&prcu->cb_version) && rclp->head;
 }
 
+/*
+ * Perform PRCU core processing for the current CPU using softirq.
+ */
 void invoke_prcu_core(void)
 {
 	if (cpu_online(smp_processor_id()))
 		raise_softirq(PRCU_SOFTIRQ);
 }
 
+/*
+ * Schedule PRCU core processing.
+ *
+ * This function must be called from hardirq context.
+ * It is normally invoked from the scheduling-clock interrupt.
+ */
 void prcu_check_callbacks(void)
 {
 	if (prcu_pending())
 		invoke_prcu_core();
 }
 
+/*
+ * Process PRCU callbacks whose grace period has completed.
+ * Do this using softirq for each CPU.
+ *
+ * Also see the prcu_barrier() comment header.
+ */
 static __latent_entropy void prcu_process_callbacks(struct softirq_action *unused)
 {
 	unsigned long flags;
@@ -237,18 +366,24 @@ static __latent_entropy void prcu_process_callbacks(struct softirq_action *unuse
 
 	cb_version = atomic64_read(&prcu->cb_version);
 
-	/* Disable interrupts to prevent races with call_prcu() */
+	/* Disable IRQs to prevent races with call_prcu(). */
 	local_irq_save(flags);
 	local = this_cpu_ptr(&prcu_local);
 	rclp = &local->cblist;
 	rhp = rclp->head;
 	vhp = rclp->version_head;
+	/*
+	 * Process PRCU callbacks with version number smaller
+	 * than the global PRCU callback version whose associated
+	 * grace periods have completed.
+	 */
 	for (; rhp && vhp && vhp->version < cb_version;
 	     rhp = rclp->head, vhp = rclp->version_head) {
 		rhp = prcu_cblist_dequeue(rclp);
 		debug_rcu_head_unqueue(rhp);
 		rhp->func(rhp);
 	}
+	/* Record the version number of callbacks to be processed. */
 	local->cb_version = cb_version;
 	local_irq_restore(flags);
 }
@@ -274,7 +409,18 @@ static void prcu_barrier_func(void *info)
 	call_prcu(&local->barrier_head, prcu_barrier_callback);
 }
 
-/* Waiting for all PRCU callbacks to complete. */
+/*
+ * Waiting for all PRCU callbacks to complete.
+ *
+ * NOTE: The current PRCU implementation relies on synchronize_prcu()
+ * to update its global grace-period and callback version numbers.
+ * If there is no synchronize_prcu() running and call_prcu() is called,
+ * rcu_process_callbacks() wont't make progress and prcu_barrier() will
+ * -not- return.
+ *
+ * This needs to be fixed, e.g. using a grace-period expediting mechanism
+ * as found in the Linux-kernel RCU implementation.
+ */
 void prcu_barrier(void)
 {
 	int cpu;
@@ -292,9 +438,13 @@ void prcu_barrier(void)
 
 	/*
 	 * Register a new callback on each CPU using IPI to prevent races
-	 * with call_prcu(). When that callback is invoked, we will know
+	 * with call_prcu().  When that callback is invoked, we will know
 	 * that all of the corresponding CPU's preceding callbacks have
-	 * been invoked.
+	 * been invoked. Note that we must use the wait version of
+	 * smp_call_function_single().  Otherwise prcu_barrier_func()
+	 * might not finish incrementing prcu->barrier_cpu_count and
+	 * registering prcu_barrier_callback() on -each- CPU before
+	 * we exit the loop and wait for completion. Hence a bug!
 	 */
 	for_each_possible_cpu(cpu)
 		smp_call_function_single(cpu, prcu_barrier_func, NULL, 1);
@@ -315,6 +465,9 @@ void prcu_barrier(void)
 }
 EXPORT_SYMBOL(prcu_barrier);
 
+/*
+ * Helper function for prcu_init() to initialize PRCU's CPU-local structure.
+ */
 void prcu_init_local_struct(int cpu)
 {
 	struct prcu_local_struct *local;
@@ -327,6 +480,9 @@ void prcu_init_local_struct(int cpu)
 	prcu_cblist_init(&local->cblist);
 }
 
+/*
+ * Initialize PRCU at boot time.
+ */
 void __init prcu_init(void)
 {
 	int cpu;
-- 
2.14.1.729.g59c0ea183