Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932746AbbELM33 (ORCPT ); Tue, 12 May 2015 08:29:29 -0400 Received: from cn.fujitsu.com ([59.151.112.132]:36876 "EHLO heian.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-FAIL) by vger.kernel.org with ESMTP id S932444AbbELM3P (ORCPT ); Tue, 12 May 2015 08:29:15 -0400 X-IronPort-AV: E=Sophos;i="5.04,848,1406563200"; d="scan'208";a="92042604" From: Lai Jiangshan To: CC: Lai Jiangshan , Tejun Heo Subject: [PATCH 7/7 V2] workqueue: add get_node_unbound_pwq() Date: Tue, 12 May 2015 20:32:35 +0800 Message-ID: <1431433955-3173-8-git-send-email-laijs@cn.fujitsu.com> X-Mailer: git-send-email 2.1.0 In-Reply-To: <1431433955-3173-1-git-send-email-laijs@cn.fujitsu.com> References: <1431433955-3173-1-git-send-email-laijs@cn.fujitsu.com> MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.167.226.103] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9111 Lines: 243 The calculation and allocation of per-node pwq code in apply_workqueue_attrs() and wq_update_unbound_numa() are highly similar, we merge them into get_node_unbound_pwq(). wq_calc_node_mask() is only used for this calculation and allocation, so it is also moved into get_node_unbound_pwq(). The only changed behavior (overhead introduced!) Any reusage of old pwq will introduce get_pwq()/put_pwq() and corresponding lock overhead. The behavior of apply_wqattrs_prepare() is still unchanged, but the wq_update_unbound_numa() is changed when the current node pwq is reused. Comparing to the old behavior, wq_update_unbound_numa() introduces 3 pairs of lock()/unlock() operations and overhead when the pwq is unchanged. Although cpu-hotplug is cold path, but this case is likely true in the cpu-hotplug path. Signed-off-by: Lai Jiangshan --- kernel/workqueue.c | 126 ++++++++++++++++++++--------------------------------- 1 file changed, 47 insertions(+), 79 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6426d6e..ed228ef64 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -295,7 +295,7 @@ module_param_named(power_efficient, wq_power_efficient, bool, 0444); static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ -/* PL: buf for apply_wqattrs_prepare() and wq_update_unbound_numa() */ +/* PL: buf for alloc_node_unbound_pwq() */ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf; static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ @@ -3458,32 +3458,45 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq, } /** - * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node - * @attrs: the wq_attrs of the default pwq of the target workqueue + * get_node_unbound_pwq - get a pwq for the specified node + * @wq: the target workqueue + * @numa: NUMA affinity * @node: the target NUMA node * @cpu_going_down: if >= 0, the CPU to consider as offline - * @cpumask: outarg, the resulting cpumask + * @dfl_pwq: the allocated default pwq * - * Calculate the cpumask a workqueue with @attrs should use on @node. If - * @cpu_going_down is >= 0, that cpu is considered offline during - * calculation. The result is stored in @cpumask. + * Allocate or reuse a pwq with the cpumask that @wq should use on @node. * - * If NUMA affinity is not enabled, @attrs->cpumask is always used. If - * enabled and @node has online CPUs requested by @attrs, the returned - * cpumask is the intersection of the possible CPUs of @node and - * @attrs->cpumask. + * If NUMA affinity is not enabled, @dfl_pwq is always used. If + * enabled and @node has online CPUs requested by @dfl_pwq->pool->attrs, + * the cpumask is the intersection of the possible CPUs of @node and + * the cpumask of @dfl_pwq->pool->attrs. If @cpu_going_down is >= 0, + * that cpu is considered offline during calculation. * * The caller is responsible for ensuring that the cpumask of @node stays * stable. * - * Return: %true if the resulting @cpumask is different from @attrs->cpumask, - * %false if equal. + * Return: valid pwq, it might be @dfl_pwq under some conditions + * or might be the current pwq of the @node. + * NULL, when the allocation failed. */ -static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node, - int cpu_going_down, cpumask_t *cpumask) +static struct pool_workqueue * +get_node_unbound_pwq(struct workqueue_struct *wq, bool numa, int node, + int cpu_going_down, struct pool_workqueue *dfl_pwq) + { - if (!wq_numa_enabled || attrs->no_numa) - goto use_dfl; + struct pool_workqueue *pwq = unbound_pwq_by_node(wq, node); + const struct workqueue_attrs *attrs = dfl_pwq->pool->attrs; + struct workqueue_attrs *tmp_attrs = wq_update_unbound_numa_attrs_buf; + cpumask_t *cpumask; + + lockdep_assert_held(&wq_pool_mutex); + + if (!wq_numa_enabled || !numa) + return get_pwq_unlocked(dfl_pwq); + + copy_workqueue_attrs(tmp_attrs, attrs); + cpumask = tmp_attrs->cpumask; /* does @node have any online CPUs @attrs wants? */ cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask); @@ -3491,15 +3504,18 @@ static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node, cpumask_clear_cpu(cpu_going_down, cpumask); if (cpumask_empty(cpumask)) - goto use_dfl; + return get_pwq_unlocked(dfl_pwq); - /* yeap, return possible CPUs in @node that @attrs wants */ + /* yeap, use possible CPUs in @node that @attrs wants */ cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]); - return !cpumask_equal(cpumask, attrs->cpumask); + if (cpumask_equal(cpumask, attrs->cpumask)) + return get_pwq_unlocked(dfl_pwq); -use_dfl: - cpumask_copy(cpumask, attrs->cpumask); - return false; + /* try to reuse the current pwq */ + if (pwq && wqattrs_equal(tmp_attrs, pwq->pool->attrs)) + return get_pwq_unlocked(pwq); + + return alloc_unbound_pwq(wq, tmp_attrs); } /* install @pwq into @wq's numa_pwq_tbl[] for @node and return the old pwq */ @@ -3551,7 +3567,7 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, const struct workqueue_attrs *attrs) { struct apply_wqattrs_ctx *ctx; - struct workqueue_attrs *new_attrs, *tmp_attrs; + struct workqueue_attrs *new_attrs; struct pool_workqueue *pwq; int node; @@ -3565,12 +3581,6 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, goto out_free; /* - * We don't need to alloc/free temporary attrs. Let's use a - * preallocated one. The following buf is protected by wq_pool_mutex. - */ - tmp_attrs = wq_update_unbound_numa_attrs_buf; - - /* * Calculate the attrs of the default pwq. * If the user configured cpumask doesn't overlap with the * wq_unbound_cpumask, we fallback to the wq_unbound_cpumask. @@ -3581,13 +3591,6 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask); /* - * We may create multiple pwqs with differing cpumasks. Make a - * copy of @new_attrs which will be modified and used to obtain - * pools. - */ - copy_workqueue_attrs(tmp_attrs, new_attrs); - - /* * If something goes wrong during CPU up/down, we'll fall back to * the default pwq covering whole @attrs->cpumask. Always create * it even if we don't use it immediately. @@ -3600,18 +3603,11 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, goto out_free; for_each_node(node) { - if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) { - pwq = unbound_pwq_by_node(wq, node); - if (pwq && wqattrs_equal(tmp_attrs, pwq->pool->attrs)) - pwq = get_pwq_unlocked(pwq); - else - pwq = alloc_unbound_pwq(wq, tmp_attrs); - if (!pwq) - goto out_free; - ctx->pwq_tbl[node] = pwq; - } else { - ctx->pwq_tbl[node] = get_pwq_unlocked(ctx->dfl_pwq); - } + pwq = get_node_unbound_pwq(wq, !attrs->no_numa, node, -1, + ctx->dfl_pwq); + if (!pwq) + goto out_free; + ctx->pwq_tbl[node] = pwq; } /* save the user configured attrs and sanitize it. */ @@ -3731,9 +3727,7 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, { int node = cpu_to_node(cpu); int cpu_off = online ? -1 : cpu; - struct pool_workqueue *old_pwq = NULL, *pwq; - struct workqueue_attrs *target_attrs; - cpumask_t *cpumask; + struct pool_workqueue *old_pwq, *pwq; lockdep_assert_held(&wq_pool_mutex); @@ -3741,40 +3735,14 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, wq->unbound_attrs->no_numa) return; - /* - * We don't wanna alloc/free wq_attrs for each wq for each CPU. - * Let's use a preallocated one. The following buf is protected by - * wq_pool_mutex. - */ - target_attrs = wq_update_unbound_numa_attrs_buf; - cpumask = target_attrs->cpumask; - - copy_workqueue_attrs(target_attrs, wq->unbound_attrs); - - /* - * Let's determine what needs to be done. If the target cpumask is - * different from the default pwq's, we need to compare it to @pwq's - * and create a new one if they don't match. If the target cpumask - * equals the default pwq's, the default pwq should be used. - */ - if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) { - pwq = unbound_pwq_by_node(wq, node); - if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask)) - return; - } else { - pwq = get_pwq_unlocked(wq->dfl_pwq); - goto install; - } - /* create a new pwq */ - pwq = alloc_unbound_pwq(wq, target_attrs); + pwq = get_node_unbound_pwq(wq, true, node, cpu_off, wq->dfl_pwq); if (!pwq) { pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", wq->name); pwq = get_pwq_unlocked(wq->dfl_pwq); } -install: /* Install the new pwq. */ mutex_lock(&wq->mutex); old_pwq = numa_pwq_tbl_install(wq, node, pwq); -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/