From: Lai Jiangshan <laijs@cn.fujitsu.com>
To: <linux-kernel@vger.kernel.org>
CC: Lai Jiangshan <laijs@cn.fujitsu.com>, Tejun Heo <tj@kernel.org>
Subject: [PATCH 3/5] workqueue: ensure attrs-changing be sequentially
Date: Mon, 11 May 2015 17:35:50 +0800
Message-ID: <1431336953-3260-4-git-send-email-laijs@cn.fujitsu.com>
In-Reply-To: <1431336953-3260-1-git-send-email-laijs@cn.fujitsu.com>
References: <1431336953-3260-1-git-send-email-laijs@cn.fujitsu.com>
MIME-Version: 1.0
Content-Type: text/plain
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 6907
Lines: 230

Current modification to attrs via sysfs is not atomically.

Process A (change cpumask)	| Process B (change numa affinity)
wq_cpumask_store()		|
  wq_sysfs_prep_attrs()		|
				| apply_workqueue_attrs()
  apply_workqueue_attrs()	|

It results that the Process B's operation is totally reverted
without any notification.

This behavior is acceptable but it is sometimes unexpected.
Sequential model on non-performance-sensitive operations is more popular
and preferred. So this patch moves wq_sysfs_prep_attrs() into the protection
under wq_pool_mutex to ensure attrs-changing be sequentially.

This patch is also a preparation patch for next patch which change
the API of apply_workqueue_attrs().

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 110 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 69 insertions(+), 41 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e7accc1..efd9a3a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3646,24 +3646,25 @@ static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
 	mutex_unlock(&ctx->wq->mutex);
 }
 
-/**
- * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
- * @wq: the target workqueue
- * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
- *
- * Apply @attrs to an unbound workqueue @wq.  Unless disabled, on NUMA
- * machines, this function maps a separate pwq to each NUMA node with
- * possibles CPUs in @attrs->cpumask so that work items are affine to the
- * NUMA node it was issued on.  Older pwqs are released as in-flight work
- * items finish.  Note that a work item which repeatedly requeues itself
- * back-to-back will stay on its current pwq.
- *
- * Performs GFP_KERNEL allocations.
- *
- * Return: 0 on success and -errno on failure.
- */
-int apply_workqueue_attrs(struct workqueue_struct *wq,
-			  const struct workqueue_attrs *attrs)
+static void apply_wqattrs_lock(void)
+{
+	/*
+	 * CPUs should stay stable across pwq creations and installations.
+	 * Pin CPUs, determine the target cpumask for each node and create
+	 * pwqs accordingly.
+	 */
+	get_online_cpus();
+	mutex_lock(&wq_pool_mutex);
+}
+
+static void apply_wqattrs_unlock(void)
+{
+	mutex_unlock(&wq_pool_mutex);
+	put_online_cpus();
+}
+
+static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
+					const struct workqueue_attrs *attrs)
 {
 	struct apply_wqattrs_ctx *ctx;
 	int ret = -ENOMEM;
@@ -3676,14 +3677,6 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 	if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
 		return -EINVAL;
 
-	/*
-	 * CPUs should stay stable across pwq creations and installations.
-	 * Pin CPUs, determine the target cpumask for each node and create
-	 * pwqs accordingly.
-	 */
-	get_online_cpus();
-	mutex_lock(&wq_pool_mutex);
-
 	ctx = apply_wqattrs_prepare(wq, attrs);
 
 	/* the ctx has been prepared successfully, let's commit it */
@@ -3694,8 +3687,33 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 
 	apply_wqattrs_cleanup(ctx);
 
-	mutex_unlock(&wq_pool_mutex);
-	put_online_cpus();
+	return ret;
+}
+
+/**
+ * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
+ * @wq: the target workqueue
+ * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
+ *
+ * Apply @attrs to an unbound workqueue @wq.  Unless disabled, on NUMA
+ * machines, this function maps a separate pwq to each NUMA node with
+ * possibles CPUs in @attrs->cpumask so that work items are affine to the
+ * NUMA node it was issued on.  Older pwqs are released as in-flight work
+ * items finish.  Note that a work item which repeatedly requeues itself
+ * back-to-back will stay on its current pwq.
+ *
+ * Performs GFP_KERNEL allocations.
+ *
+ * Return: 0 on success and -errno on failure.
+ */
+int apply_workqueue_attrs(struct workqueue_struct *wq,
+			  const struct workqueue_attrs *attrs)
+{
+	int ret;
+
+	apply_wqattrs_lock();
+	ret = apply_workqueue_attrs_locked(wq, attrs);
+	apply_wqattrs_unlock();
 
 	return ret;
 }
@@ -4784,10 +4802,9 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
 	if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
 		return -ENOMEM;
 
-	get_online_cpus();
 	cpumask_and(cpumask, cpumask, cpu_possible_mask);
 	if (!cpumask_empty(cpumask)) {
-		mutex_lock(&wq_pool_mutex);
+		apply_wqattrs_lock();
 
 		/* save the old wq_unbound_cpumask. */
 		cpumask_copy(saved_cpumask, wq_unbound_cpumask);
@@ -4800,9 +4817,8 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
 		if (ret < 0)
 			cpumask_copy(wq_unbound_cpumask, saved_cpumask);
 
-		mutex_unlock(&wq_pool_mutex);
+		apply_wqattrs_unlock();
 	}
-	put_online_cpus();
 
 	free_cpumask_var(saved_cpumask);
 	return ret;
@@ -4927,18 +4943,22 @@ static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
 {
 	struct workqueue_struct *wq = dev_to_wq(dev);
 	struct workqueue_attrs *attrs;
-	int ret;
+	int ret = -ENOMEM;
+
+	apply_wqattrs_lock();
 
 	attrs = wq_sysfs_prep_attrs(wq);
 	if (!attrs)
-		return -ENOMEM;
+		goto out_unlock;
 
 	if (sscanf(buf, "%d", &attrs->nice) == 1 &&
 	    attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
-		ret = apply_workqueue_attrs(wq, attrs);
+		ret = apply_workqueue_attrs_locked(wq, attrs);
 	else
 		ret = -EINVAL;
 
+out_unlock:
+	apply_wqattrs_unlock();
 	free_workqueue_attrs(attrs);
 	return ret ?: count;
 }
@@ -4962,16 +4982,20 @@ static ssize_t wq_cpumask_store(struct device *dev,
 {
 	struct workqueue_struct *wq = dev_to_wq(dev);
 	struct workqueue_attrs *attrs;
-	int ret;
+	int ret = -ENOMEM;
+
+	apply_wqattrs_lock();
 
 	attrs = wq_sysfs_prep_attrs(wq);
 	if (!attrs)
-		return -ENOMEM;
+		goto out_unlock;
 
 	ret = cpumask_parse(buf, attrs->cpumask);
 	if (!ret)
-		ret = apply_workqueue_attrs(wq, attrs);
+		ret = apply_workqueue_attrs_locked(wq, attrs);
 
+out_unlock:
+	apply_wqattrs_unlock();
 	free_workqueue_attrs(attrs);
 	return ret ?: count;
 }
@@ -4995,18 +5019,22 @@ static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
 {
 	struct workqueue_struct *wq = dev_to_wq(dev);
 	struct workqueue_attrs *attrs;
-	int v, ret;
+	int v, ret = -ENOMEM;
+
+	apply_wqattrs_lock();
 
 	attrs = wq_sysfs_prep_attrs(wq);
 	if (!attrs)
-		return -ENOMEM;
+		goto out_unlock;
 
 	ret = -EINVAL;
 	if (sscanf(buf, "%d", &v) == 1) {
 		attrs->no_numa = !v;
-		ret = apply_workqueue_attrs(wq, attrs);
+		ret = apply_workqueue_attrs_locked(wq, attrs);
 	}
 
+out_unlock:
+	apply_wqattrs_unlock();
 	free_workqueue_attrs(attrs);
 	return ret ?: count;
 }
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/