From: Lai Jiangshan <laijs@cn.fujitsu.com>
To: <linux-kernel@vger.kernel.org>
CC: Lai Jiangshan <laijs@cn.fujitsu.com>, Tejun Heo <tj@kernel.org>
Subject: [PATCH 7/7 V2] workqueue: add get_node_unbound_pwq()
Date: Tue, 12 May 2015 20:32:35 +0800
Message-ID: <1431433955-3173-8-git-send-email-laijs@cn.fujitsu.com>
In-Reply-To: <1431433955-3173-1-git-send-email-laijs@cn.fujitsu.com>
References: <1431433955-3173-1-git-send-email-laijs@cn.fujitsu.com>
MIME-Version: 1.0
Content-Type: text/plain
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 9111
Lines: 243

The calculation and allocation of per-node pwq code in
apply_workqueue_attrs() and wq_update_unbound_numa() are highly similar,
we merge them into get_node_unbound_pwq().

wq_calc_node_mask() is only used for this calculation and allocation,
so it is also moved into get_node_unbound_pwq().

The only changed behavior (overhead introduced!)
	Any reusage of old pwq will introduce get_pwq()/put_pwq() and
	corresponding lock overhead.  The behavior of apply_wqattrs_prepare()
	is still unchanged, but the wq_update_unbound_numa() is changed
	when the current node pwq is reused.  Comparing to the old behavior,
	wq_update_unbound_numa() introduces 3 pairs of lock()/unlock()
	operations and overhead when the pwq is unchanged.  Although
	cpu-hotplug is cold path, but this case is likely true in
	the cpu-hotplug path.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 126 ++++++++++++++++++++---------------------------------
 1 file changed, 47 insertions(+), 79 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6426d6e..ed228ef64 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -295,7 +295,7 @@ module_param_named(power_efficient, wq_power_efficient, bool, 0444);
 
 static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 
-/* PL: buf for apply_wqattrs_prepare() and wq_update_unbound_numa() */
+/* PL: buf for alloc_node_unbound_pwq() */
 static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
 
 static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
@@ -3458,32 +3458,45 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
 }
 
 /**
- * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node
- * @attrs: the wq_attrs of the default pwq of the target workqueue
+ * get_node_unbound_pwq - get a pwq for the specified node
+ * @wq: the target workqueue
+ * @numa: NUMA affinity
  * @node: the target NUMA node
  * @cpu_going_down: if >= 0, the CPU to consider as offline
- * @cpumask: outarg, the resulting cpumask
+ * @dfl_pwq: the allocated default pwq
  *
- * Calculate the cpumask a workqueue with @attrs should use on @node.  If
- * @cpu_going_down is >= 0, that cpu is considered offline during
- * calculation.  The result is stored in @cpumask.
+ * Allocate or reuse a pwq with the cpumask that @wq should use on @node.
  *
- * If NUMA affinity is not enabled, @attrs->cpumask is always used.  If
- * enabled and @node has online CPUs requested by @attrs, the returned
- * cpumask is the intersection of the possible CPUs of @node and
- * @attrs->cpumask.
+ * If NUMA affinity is not enabled, @dfl_pwq is always used.  If
+ * enabled and @node has online CPUs requested by @dfl_pwq->pool->attrs,
+ * the cpumask is the intersection of the possible CPUs of @node and
+ * the cpumask of @dfl_pwq->pool->attrs.  If @cpu_going_down is >= 0,
+ * that cpu is considered offline during calculation.
  *
  * The caller is responsible for ensuring that the cpumask of @node stays
  * stable.
  *
- * Return: %true if the resulting @cpumask is different from @attrs->cpumask,
- * %false if equal.
+ * Return: valid pwq, it might be @dfl_pwq under some conditions
+ * 		or might be the current pwq of the @node.
+ * 	   NULL, when the allocation failed.
  */
-static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
-				 int cpu_going_down, cpumask_t *cpumask)
+static struct pool_workqueue *
+get_node_unbound_pwq(struct workqueue_struct *wq, bool numa, int node,
+		     int cpu_going_down, struct pool_workqueue *dfl_pwq)
+
 {
-	if (!wq_numa_enabled || attrs->no_numa)
-		goto use_dfl;
+	struct pool_workqueue *pwq = unbound_pwq_by_node(wq, node);
+	const struct workqueue_attrs *attrs = dfl_pwq->pool->attrs;
+	struct workqueue_attrs *tmp_attrs = wq_update_unbound_numa_attrs_buf;
+	cpumask_t *cpumask;
+
+	lockdep_assert_held(&wq_pool_mutex);
+
+	if (!wq_numa_enabled || !numa)
+		return get_pwq_unlocked(dfl_pwq);
+
+	copy_workqueue_attrs(tmp_attrs, attrs);
+	cpumask = tmp_attrs->cpumask;
 
 	/* does @node have any online CPUs @attrs wants? */
 	cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
@@ -3491,15 +3504,18 @@ static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
 		cpumask_clear_cpu(cpu_going_down, cpumask);
 
 	if (cpumask_empty(cpumask))
-		goto use_dfl;
+		return get_pwq_unlocked(dfl_pwq);
 
-	/* yeap, return possible CPUs in @node that @attrs wants */
+	/* yeap, use possible CPUs in @node that @attrs wants */
 	cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
-	return !cpumask_equal(cpumask, attrs->cpumask);
+	if (cpumask_equal(cpumask, attrs->cpumask))
+		return get_pwq_unlocked(dfl_pwq);
 
-use_dfl:
-	cpumask_copy(cpumask, attrs->cpumask);
-	return false;
+	/* try to reuse the current pwq */
+	if (pwq && wqattrs_equal(tmp_attrs, pwq->pool->attrs))
+		return get_pwq_unlocked(pwq);
+
+	return alloc_unbound_pwq(wq, tmp_attrs);
 }
 
 /* install @pwq into @wq's numa_pwq_tbl[] for @node and return the old pwq */
@@ -3551,7 +3567,7 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
 		      const struct workqueue_attrs *attrs)
 {
 	struct apply_wqattrs_ctx *ctx;
-	struct workqueue_attrs *new_attrs, *tmp_attrs;
+	struct workqueue_attrs *new_attrs;
 	struct pool_workqueue *pwq;
 	int node;
 
@@ -3565,12 +3581,6 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
 		goto out_free;
 
 	/*
-	 * We don't need to alloc/free temporary attrs. Let's use a
-	 * preallocated one.  The following buf is protected by wq_pool_mutex.
-	 */
-	tmp_attrs = wq_update_unbound_numa_attrs_buf;
-
-	/*
 	 * Calculate the attrs of the default pwq.
 	 * If the user configured cpumask doesn't overlap with the
 	 * wq_unbound_cpumask, we fallback to the wq_unbound_cpumask.
@@ -3581,13 +3591,6 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
 		cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
 
 	/*
-	 * We may create multiple pwqs with differing cpumasks.  Make a
-	 * copy of @new_attrs which will be modified and used to obtain
-	 * pools.
-	 */
-	copy_workqueue_attrs(tmp_attrs, new_attrs);
-
-	/*
 	 * If something goes wrong during CPU up/down, we'll fall back to
 	 * the default pwq covering whole @attrs->cpumask.  Always create
 	 * it even if we don't use it immediately.
@@ -3600,18 +3603,11 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
 		goto out_free;
 
 	for_each_node(node) {
-		if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
-			pwq = unbound_pwq_by_node(wq, node);
-			if (pwq && wqattrs_equal(tmp_attrs, pwq->pool->attrs))
-				pwq = get_pwq_unlocked(pwq);
-			else
-				pwq = alloc_unbound_pwq(wq, tmp_attrs);
-			if (!pwq)
-				goto out_free;
-			ctx->pwq_tbl[node] = pwq;
-		} else {
-			ctx->pwq_tbl[node] = get_pwq_unlocked(ctx->dfl_pwq);
-		}
+		pwq = get_node_unbound_pwq(wq, !attrs->no_numa, node, -1,
+					   ctx->dfl_pwq);
+		if (!pwq)
+			goto out_free;
+		ctx->pwq_tbl[node] = pwq;
 	}
 
 	/* save the user configured attrs and sanitize it. */
@@ -3731,9 +3727,7 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
 {
 	int node = cpu_to_node(cpu);
 	int cpu_off = online ? -1 : cpu;
-	struct pool_workqueue *old_pwq = NULL, *pwq;
-	struct workqueue_attrs *target_attrs;
-	cpumask_t *cpumask;
+	struct pool_workqueue *old_pwq, *pwq;
 
 	lockdep_assert_held(&wq_pool_mutex);
 
@@ -3741,40 +3735,14 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
 	    wq->unbound_attrs->no_numa)
 		return;
 
-	/*
-	 * We don't wanna alloc/free wq_attrs for each wq for each CPU.
-	 * Let's use a preallocated one.  The following buf is protected by
-	 * wq_pool_mutex.
-	 */
-	target_attrs = wq_update_unbound_numa_attrs_buf;
-	cpumask = target_attrs->cpumask;
-
-	copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
-
-	/*
-	 * Let's determine what needs to be done.  If the target cpumask is
-	 * different from the default pwq's, we need to compare it to @pwq's
-	 * and create a new one if they don't match.  If the target cpumask
-	 * equals the default pwq's, the default pwq should be used.
-	 */
-	if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
-		pwq = unbound_pwq_by_node(wq, node);
-		if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
-			return;
-	} else {
-		pwq = get_pwq_unlocked(wq->dfl_pwq);
-		goto install;
-	}
-
 	/* create a new pwq */
-	pwq = alloc_unbound_pwq(wq, target_attrs);
+	pwq = get_node_unbound_pwq(wq, true, node, cpu_off, wq->dfl_pwq);
 	if (!pwq) {
 		pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
 			wq->name);
 		pwq = get_pwq_unlocked(wq->dfl_pwq);
 	}
 
-install:
 	/* Install the new pwq. */
 	mutex_lock(&wq->mutex);
 	old_pwq = numa_pwq_tbl_install(wq, node, pwq);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/