Message-ID: <1437292973.3505.83.camel@gmail.com>
Subject: Re: [RFC] workqueue: avoiding unbounded wq on isolated CPUs by
 default
From: Mike Galbraith <umgwanakikbuti@gmail.com>
To: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tejun Heo <tj@kernel.org>, Daniel Bristot de Oliveira <bristot@redhat.com>,
        LKML <linux-kernel@vger.kernel.org>,
        Lai Jiangshan <jiangshanlai@gmail.com>, Rik van Riel <riel@redhat.com>,
        "Luis Claudio R. Goncalves" <lclaudio@uudg.org>
Date: Sun, 19 Jul 2015 10:02:53 +0200
In-Reply-To: <20150718133602.GA3041@lerouge>
References: <9e53de7c91c885ee255e16ee25f401d9eedf08d9.1437067317.git.bristot@redhat.com>
	 <20150716192448.GY15934@mtj.duckdns.org>
	 <1437107190.3438.23.camel@gmail.com>
	 <20150717152720.GD15934@mtj.duckdns.org>
	 <1437153348.5860.32.camel@gmail.com> <20150718133602.GA3041@lerouge>
Content-Type: text/plain; charset="UTF-8"
Mime-Version: 1.0
Content-Transfer-Encoding: 7bit
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 3535
Lines: 113

On Sat, 2015-07-18 at 15:36 +0200, Frederic Weisbecker wrote:

> But we can't leave it half-way like it is currently with everything preset on
> top of nohz: rcu nocb mask, watchdog mask, cpu_isolation_map and exclude workqueue.

To automate or not aside...

WRT wq_unbound_cpumask, it's very nice to have but anyone watching their
box should notice generic allegedly unbound work landing on the bound
system_wq, thus the quiet zone isn't protected from these work items.  

For example, my little perturbation measurement proggy emits a stat line
periodically, which leads to tty_schedule_flip() -> schedule_work() thus
it perturbs itself seemingly needlessly.  Lord knows how many other ways
there are to do the same.

The hack below is not intended to be anything remotely resembling a
proper answer to that problem, it's my box encouraging me to ask the
question by surviving (modulo destroy, redirect there is bad idea).

Why do we do nothing about these allegedly unbound work items?

---
 include/linux/sched.h |    2 ++
 kernel/workqueue.c    |   24 ++++++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1430,6 +1430,8 @@ struct task_struct {
 	unsigned sched_contributes_to_load:1;
 	unsigned sched_migrated:1;
 
+	unsigned work_redirect_disable:1;
+
 #ifdef CONFIG_MEMCG_KMEM
 	unsigned memcg_kmem_skip_account:1;
 #endif
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1294,6 +1294,21 @@ static bool is_chained_work(struct workq
 	return worker && worker->current_pwq->wq == wq;
 }
 
+static struct workqueue_struct *
+redirect_generic_unbound_work(int cpu, struct workqueue_struct *wq)
+{
+	if (cpu != WORK_CPU_UNBOUND || wq != system_wq)
+		return wq;
+	if (current->work_redirect_disable)
+		return wq;
+	if (cpumask_test_cpu(raw_smp_processor_id(), wq_unbound_cpumask))
+		return wq;
+	if (wq->flags & __WQ_DRAINING || system_unbound_wq->flags & __WQ_DRAINING)
+		return wq;
+
+	return system_unbound_wq;
+}
+
 static void __queue_work(int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
@@ -1317,6 +1332,7 @@ static void __queue_work(int cpu, struct
 	if (unlikely(wq->flags & __WQ_DRAINING) &&
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
+	wq = redirect_generic_unbound_work(req_cpu, wq);
 retry:
 	if (req_cpu == WORK_CPU_UNBOUND)
 		cpu = raw_smp_processor_id();
@@ -3926,6 +3942,8 @@ void destroy_workqueue(struct workqueue_
 	struct pool_workqueue *pwq;
 	int node;
 
+	current->work_redirect_disable = 1;
+
 	/* drain it before proceeding with destruction */
 	drain_workqueue(wq);
 
@@ -3937,7 +3955,7 @@ void destroy_workqueue(struct workqueue_
 		for (i = 0; i < WORK_NR_COLORS; i++) {
 			if (WARN_ON(pwq->nr_in_flight[i])) {
 				mutex_unlock(&wq->mutex);
-				return;
+				goto out;
 			}
 		}
 
@@ -3945,7 +3963,7 @@ void destroy_workqueue(struct workqueue_
 		    WARN_ON(pwq->nr_active) ||
 		    WARN_ON(!list_empty(&pwq->delayed_works))) {
 			mutex_unlock(&wq->mutex);
-			return;
+			goto out;
 		}
 	}
 	mutex_unlock(&wq->mutex);
@@ -3991,6 +4009,8 @@ void destroy_workqueue(struct workqueue_
 		wq->dfl_pwq = NULL;
 		put_pwq_unlocked(pwq);
 	}
+out:
+	current->work_redirect_disable = 0;
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/