Date: Fri, 29 Sep 2017 13:11:26 +0200
From: Peter Zijlstra <peterz@infradead.org>
To: "Levin, Alexander (Sasha Levin)" <alexander.levin@verizon.com>
Cc: Sasha Levin <levinsasha928@gmail.com>, Ingo Molnar <mingo@kernel.org>,
        Thomas Gleixner <tglx@linutronix.de>,
        "linux-kernel@vger.kernel.org List" <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH 3/4] sched: WARN when migrating to an offline CPU
Message-ID: <20170929111126.xdqcgye7iyvphb6a@hirez.programming.kicks-ass.net>
References: <20170907150349.063852465@infradead.org>
 <20170907150614.094206976@infradead.org>
 <CA+1xoqe0AW9acyvdtE+=CN8FisQrXH2wnwvakSXapg3sKE4khw@mail.gmail.com>
 <20170928103541.2adtamm7koi4k5zh@hirez.programming.kicks-ass.net>
 <20170928110308.lvpxf5fw7crbwsg6@sasha-lappy>
 <20170928114249.qddcf4pntvuttswd@hirez.programming.kicks-ass.net>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
In-Reply-To: <20170928114249.qddcf4pntvuttswd@hirez.programming.kicks-ass.net>
User-Agent: NeoMutt/20170609 (1.8.3)
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 4182
Lines: 116

On Thu, Sep 28, 2017 at 01:42:49PM +0200, Peter Zijlstra wrote:
> On Thu, Sep 28, 2017 at 11:03:10AM +0000, Levin, Alexander (Sasha Levin) wrote:
> > On Thu, Sep 28, 2017 at 12:35:41PM +0200, Peter Zijlstra wrote:
> > >On Thu, Sep 28, 2017 at 02:14:15AM -0700, Sasha Levin wrote:
> 
> > >> [2035565360.446794] Unregister pv shared memory for cpu 2
> > >> [2035565360.467930] numa_remove_cpu cpu 2 node 2: mask now 6
> > >> [2035565360.471431] ------------[ cut here ]------------
> > >> [2035565360.472548] WARNING: CPU: 2 PID: 24 at > kernel/sched/core.c:1178 set_task_cpu (kernel/sched/core.c:1157)
> > >> [2035565360.473840] Modules linked in:
> > >> [2035565360.474632] CPU: 2 PID: 24 Comm: migration/2 Not tainted > 4.14.0-rc2-next-20170927+ #252
> > >
> > >Urgh, weird. That really shouldn't happen. Can you easily reproduce?
> > 
> > Looks like yes. Seems like it's enough to stress CPU hotplug + trinity.
> 
> OK, I'll see if I can reproduce building kernels and hotplug stress.
> Otherwise I'll try and cook up some debug patches for you.

I can't seem to trigger :-(

Can you please run with the below patch and:

  # echo 1 > /proc/sys/kernel/traceoff_on_warning

---
 kernel/sched/core.c |  3 +++
 kernel/sched/fair.c | 10 ++++++++++
 2 files changed, 13 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 18a6966567da..c613f7756981 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5592,6 +5592,7 @@ int sched_cpu_activate(unsigned int cpu)
 	struct rq_flags rf;
 
 	set_cpu_active(cpu, true);
+	trace_printk("active: %d mask: %*pbl\n", cpu, cpumask_pr_args(cpu_active_mask));
 
 	if (sched_smp_initialized) {
 		sched_domains_numa_masks_set(cpu);
@@ -5624,6 +5625,7 @@ int sched_cpu_deactivate(unsigned int cpu)
 	int ret;
 
 	set_cpu_active(cpu, false);
+	trace_printk("not-active: %d mask: %*pbl\n", cpu, cpumask_pr_args(cpu_active_mask));
 	/*
 	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
 	 * users of this state to go away such that all new such users will
@@ -5632,6 +5634,7 @@ int sched_cpu_deactivate(unsigned int cpu)
 	 * Do sync before park smpboot threads to take care the rcu boost case.
 	 */
 	synchronize_rcu_mult(call_rcu, call_rcu_sched);
+	trace_printk("rcu-sync: %d\n", cpu);
 
 	if (!sched_smp_initialized)
 		return 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 70ba32e08a23..cb8f43a59f33 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8150,8 +8150,11 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 		.tasks		= LIST_HEAD_INIT(env.tasks),
 	};
 
+
 	cpumask_and(cpus, sched_domain_span(sd), cpu_active_mask);
 
+	trace_printk("dst_cpu: %d cpus: %*pbl\n", this_cpu, cpumask_pr_args(cpus));
+
 	schedstat_inc(sd->lb_count[idle]);
 
 redo:
@@ -8248,6 +8251,9 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 
 			env.dst_rq	 = cpu_rq(env.new_dst_cpu);
 			env.dst_cpu	 = env.new_dst_cpu;
+
+			trace_printk("dst_cpu: %d\n", env.dst_cpu);
+
 			env.flags	&= ~LBF_DST_PINNED;
 			env.loop	 = 0;
 			env.loop_break	 = sched_nr_migrate_break;
@@ -8465,6 +8471,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
 
 	update_blocked_averages(this_cpu);
 	rcu_read_lock();
+	trace_printk("rcu-read-lock: %d\n", this_cpu);
 	for_each_domain(this_cpu, sd) {
 		int continue_balancing = 1;
 		u64 t0, domain_cost;
@@ -8500,6 +8507,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
 		if (pulled_task || this_rq->nr_running > 0)
 			break;
 	}
+	trace_printk("rcu-read-unlock: %d\n", this_cpu);
 	rcu_read_unlock();
 
 	raw_spin_lock(&this_rq->lock);
@@ -8790,6 +8798,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
 	update_blocked_averages(cpu);
 
 	rcu_read_lock();
+	trace_printk("rcu-read-lock: %d\n", cpu);
 	for_each_domain(cpu, sd) {
 		/*
 		 * Decay the newidle max times here because this is a regular
@@ -8853,6 +8862,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
 		rq->max_idle_balance_cost =
 			max((u64)sysctl_sched_migration_cost, max_cost);
 	}
+	trace_printk("rcu-read-unlock: %d\n", cpu);
 	rcu_read_unlock();
 
 	/*