Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753209AbdCNPHC (ORCPT ); Tue, 14 Mar 2017 11:07:02 -0400 Received: from Galois.linutronix.de ([146.0.238.70]:42578 "EHLO Galois.linutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752199AbdCNPG7 (ORCPT ); Tue, 14 Mar 2017 11:06:59 -0400 Date: Tue, 14 Mar 2017 16:06:45 +0100 From: Sebastian Andrzej Siewior To: Bart Van Assche Cc: "tglx@linutronix.de" , "torvalds@linux-foundation.org" , "mingo@kernel.org" , "linux-kernel@vger.kernel.org" , "hpa@zytor.com" , "akpm@linux-foundation.org" Subject: [PATCH] cpu/hotplug: Serialize callback invocations proper Message-ID: <20170314150645.g4tdyoszlcbajmna@linutronix.de> References: <1488851515.6858.2.camel@sandisk.com> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <1488851515.6858.2.camel@sandisk.com> User-Agent: NeoMutt/20170113 (1.7.2) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 3919 Lines: 140 The setup/remove_state/instance() functions in the hotplug core code are serialized against concurrent CPU hotplug, but unfortunately not serialized against themself. As a consequence a concurrent invocation of these function results in corruption of the callback machinery because two instances try to invoke callbacks on remote cpus at the same time. This results in missing callback invocations and initiator threads waiting forever on the completion. The obvious solution to replace get_cpu_online() with cpu_hotplug_begin() is not possible because at least one callsite calls into these functions from a get_online_cpu() locked region. Extend the protection scope of the cpuhp_state_mutex from solely protecting the state arrays to cover the callback invocation machinery as well. Reported-by: Bart Van Assche Fixes: 5b7aa87e0482 ("cpu/hotplug: Implement setup/removal interface") Signed-off-by: Sebastian Andrzej Siewior --- kernel/cpu.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1335,26 +1335,21 @@ static int cpuhp_store_callbacks(enum cp struct cpuhp_step *sp; int ret = 0; - mutex_lock(&cpuhp_state_mutex); - if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) { ret = cpuhp_reserve_state(state); if (ret < 0) - goto out; + return ret; state = ret; } sp = cpuhp_get_step(state); - if (name && sp->name) { - ret = -EBUSY; - goto out; - } + if (name && sp->name) + return -EBUSY; + sp->startup.single = startup; sp->teardown.single = teardown; sp->name = name; sp->multi_instance = multi_instance; INIT_HLIST_HEAD(&sp->list); -out: - mutex_unlock(&cpuhp_state_mutex); return ret; } @@ -1428,6 +1423,7 @@ int __cpuhp_state_add_instance(enum cpuh return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); if (!invoke || !sp->startup.multi) goto add_node; @@ -1447,16 +1443,14 @@ int __cpuhp_state_add_instance(enum cpuh if (ret) { if (sp->teardown.multi) cpuhp_rollback_install(cpu, state, node); - goto err; + goto unlock; } } add_node: ret = 0; - mutex_lock(&cpuhp_state_mutex); hlist_add_head(node, &sp->list); +unlock: mutex_unlock(&cpuhp_state_mutex); - -err: put_online_cpus(); return ret; } @@ -1491,6 +1485,7 @@ int __cpuhp_setup_state(enum cpuhp_state return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance); @@ -1524,6 +1519,7 @@ int __cpuhp_setup_state(enum cpuhp_state } } out: + mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); /* * If the requested state is CPUHP_AP_ONLINE_DYN, return the @@ -1547,6 +1543,8 @@ int __cpuhp_state_remove_instance(enum c return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); + if (!invoke || !cpuhp_get_teardown_cb(state)) goto remove; /* @@ -1563,7 +1561,6 @@ int __cpuhp_state_remove_instance(enum c } remove: - mutex_lock(&cpuhp_state_mutex); hlist_del(node); mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); @@ -1571,6 +1568,7 @@ int __cpuhp_state_remove_instance(enum c return 0; } EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance); + /** * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state * @state: The state to remove @@ -1589,6 +1587,7 @@ void __cpuhp_remove_state(enum cpuhp_sta get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); if (sp->multi_instance) { WARN(!hlist_empty(&sp->list), "Error: Removing state %d which has instances left.\n", @@ -1613,6 +1612,7 @@ void __cpuhp_remove_state(enum cpuhp_sta } remove: cpuhp_store_callbacks(state, NULL, NULL, NULL, false); + mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); } EXPORT_SYMBOL(__cpuhp_remove_state);