Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933440AbdCaLYN (ORCPT ); Fri, 31 Mar 2017 07:24:13 -0400 Received: from merlin.infradead.org ([205.233.59.134]:45930 "EHLO merlin.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933250AbdCaLYJ (ORCPT ); Fri, 31 Mar 2017 07:24:09 -0400 Date: Fri, 31 Mar 2017 13:23:58 +0200 From: Peter Zijlstra To: Paul Turner Cc: Yuyang Du , Ingo Molnar , LKML , Benjamin Segall , Morten Rasmussen , Vincent Guittot , Dietmar Eggemann , Matt Fleming , Mike Galbraith Subject: Re: [RESEND PATCH 2/2] sched/fair: Optimize __update_sched_avg() Message-ID: <20170331112358.422kb5aosbq3k5tn@hirez.programming.kicks-ass.net> References: <1486935863-25251-3-git-send-email-yuyang.du@intel.com> <20170328144625.GX3093@worktop> <20170329000442.GC2459@ydu19desktop> <20170329104126.lg6ismevfbqywpcj@hirez.programming.kicks-ass.net> <20170330121658.6mo7datma4ssw7st@hirez.programming.kicks-ass.net> <20170330141428.deiduft5btwid6ov@hirez.programming.kicks-ass.net> <20170331070141.ygghjaamv6bswbyl@hirez.programming.kicks-ass.net> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: User-Agent: NeoMutt/20170113 (1.7.2) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5699 Lines: 171 On Fri, Mar 31, 2017 at 02:58:57AM -0700, Paul Turner wrote: > > So lets pull it out again -- but I don't think we need to undo all of > > yuyang's patches for that. So please, look at the patch I proposed for > > the problem you spotted. Lets fix the current state and take it from > > there, ok? > > Oh, absolutely. I'm not really not proposing re-vulcanizing any > rubber here. Just saying that this particular problem is just one > facet of the weight mixing. 100% agree on fixing this as is and > iterating. I was actually thinking that these changes (which really > nicely simplify the calculation) greatly simplify restoring the > separation there. So on top of the previous patch; I've arrived (without testing and considering numerical overflow much) at the following patch. Its known broken for things like update_cfs_rq_load_avg() where doing this will unconditionally flatten load_sum, which seems undesired. Need to think more on this. Also, I've pulled out the cpufreq scaling, which I'm not sure is the right thing to do. Vincent wants to scale time, which would be persistent in the history, unlike this now. But yes, this looks doable and simpler than before. --- --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -313,7 +313,7 @@ struct load_weight { */ struct sched_avg { u64 last_update_time; - u64 load_sum; + u32 load_sum; u32 util_sum; u32 period_contrib; unsigned long load_avg; --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -747,8 +747,8 @@ void init_entity_runnable_average(struct * nothing has been attached to the task group yet. */ if (entity_is_task(se)) - sa->load_avg = scale_load_down(se->load.weight); - sa->load_sum = sa->load_avg * LOAD_AVG_MAX; + sa->load_avg = se->load.weight; + sa->load_sum = LOAD_AVG_MAX; /* * At this point, util_avg won't be used in select_task_rq_fair anyway */ @@ -2820,15 +2820,11 @@ static u32 __accumulate_pelt_segments(u6 */ static __always_inline u32 accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, - unsigned long weight, int running, struct cfs_rq *cfs_rq) + bool load, bool running, struct cfs_rq *cfs_rq) { - unsigned long scale_freq, scale_cpu; - u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */ + u32 contrib = (u32)delta; u64 periods; - scale_freq = arch_scale_freq_capacity(NULL, cpu); - scale_cpu = arch_scale_cpu_capacity(NULL, cpu); - delta += sa->period_contrib; periods = delta / 1024; /* A period is 1024us (~1ms) */ @@ -2852,14 +2848,13 @@ accumulate_sum(u64 delta, int cpu, struc } sa->period_contrib = delta; - contrib = cap_scale(contrib, scale_freq); - if (weight) { - sa->load_sum += weight * contrib; + if (load) { + sa->load_sum += contrib; if (cfs_rq) - cfs_rq->runnable_load_sum += weight * contrib; + cfs_rq->runnable_load_sum += contrib; } if (running) - sa->util_sum += contrib * scale_cpu; + sa->util_sum += contrib; return periods; } @@ -2894,9 +2889,10 @@ accumulate_sum(u64 delta, int cpu, struc */ static __always_inline int ___update_load_avg(u64 now, int cpu, struct sched_avg *sa, - unsigned long weight, int running, struct cfs_rq *cfs_rq) + unsigned long weight, bool running, struct cfs_rq *cfs_rq) { - u64 delta; + unsigned long scale_freq, scale_cpu; + u64 delta, load; delta = now - sa->last_update_time; /* @@ -2924,18 +2920,22 @@ ___update_load_avg(u64 now, int cpu, str * Step 1: accumulate *_sum since last_update_time. If we haven't * crossed period boundaries, finish. */ - if (!accumulate_sum(delta, cpu, sa, weight, running, cfs_rq)) + if (!accumulate_sum(delta, cpu, sa, !!weight, running, cfs_rq)) return 0; + scale_freq = arch_scale_freq_capacity(NULL, cpu); + scale_cpu = arch_scale_cpu_capacity(NULL, cpu); + /* * Step 2: update *_avg. */ - sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX); + load = cap_scale(weight, scale_freq); + sa->load_avg = div_u64(load * sa->load_sum, LOAD_AVG_MAX); if (cfs_rq) { cfs_rq->runnable_load_avg = - div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX); + div_u64(load * cfs_rq->runnable_load_sum, LOAD_AVG_MAX); } - sa->util_avg = sa->util_sum / LOAD_AVG_MAX; + sa->util_avg = (cap_scale(scale_cpu, scale_freq) * sa->util_sum) / LOAD_AVG_MAX; return 1; } @@ -2950,7 +2950,7 @@ static int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se) { return ___update_load_avg(now, cpu, &se->avg, - se->on_rq * scale_load_down(se->load.weight), + se->on_rq * se->load.weight, cfs_rq->curr == se, NULL); } @@ -2958,8 +2958,7 @@ static int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq) { return ___update_load_avg(now, cpu, &cfs_rq->avg, - scale_load_down(cfs_rq->load.weight), - cfs_rq->curr != NULL, cfs_rq); + cfs_rq->load.weight, cfs_rq->curr != NULL, cfs_rq); } /* @@ -3130,11 +3129,11 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq /* Set new sched_entity's load */ se->avg.load_avg = load; - se->avg.load_sum = se->avg.load_avg * LOAD_AVG_MAX; + se->avg.load_sum = LOAD_AVG_MAX; /* Update parent cfs_rq load */ add_positive(&cfs_rq->avg.load_avg, delta); - cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * LOAD_AVG_MAX; + cfs_rq->avg.load_sum = LOAD_AVG_MAX; /* * If the sched_entity is already enqueued, we also have to update the @@ -3143,7 +3142,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq if (se->on_rq) { /* Update parent cfs_rq runnable_load_avg */ add_positive(&cfs_rq->runnable_load_avg, delta); - cfs_rq->runnable_load_sum = cfs_rq->runnable_load_avg * LOAD_AVG_MAX; + cfs_rq->runnable_load_sum = LOAD_AVG_MAX; } }