Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752062AbbHTF0r (ORCPT ); Thu, 20 Aug 2015 01:26:47 -0400 Received: from lgeamrelo04.lge.com ([156.147.1.127]:47867 "EHLO lgeamrelo04.lge.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751610AbbHTF0q (ORCPT ); Thu, 20 Aug 2015 01:26:46 -0400 X-Original-SENDERIP: 10.177.222.33 X-Original-MAILFROM: byungchul.park@lge.com Date: Thu, 20 Aug 2015 14:26:08 +0900 From: Byungchul Park To: Peter Zijlstra Cc: mingo@kernel.org, linux-kernel@vger.kernel.org, yuyang.du@intel.com Subject: Re: [PATCH v3 0/5] sync a se with its cfs_rq when att(det)aching it Message-ID: <20150820052608.GF24261@byungchulpark-X58A-UD3R> References: <1439966836-11266-1-git-send-email-byungchul.park@lge.com> <20150820011721.GA3138@worktop.event.rightround.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20150820011721.GA3138@worktop.event.rightround.com> User-Agent: Mutt/1.5.21 (2010-09-15) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9619 Lines: 267 On Thu, Aug 20, 2015 at 03:17:21AM +0200, Peter Zijlstra wrote: > > I did something like this on top.. please have a look at the XXX and > integrate. yes, i will do it. > > --- > > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -2664,8 +2664,8 @@ static inline u64 cfs_rq_clock_task(stru > /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */ > static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) > { > - int decayed; > struct sched_avg *sa = &cfs_rq->avg; > + int decayed; > > if (atomic_long_read(&cfs_rq->removed_load_avg)) { > long r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0); > @@ -2695,15 +2695,16 @@ static inline int update_cfs_rq_load_avg > static inline void update_load_avg(struct sched_entity *se, int update_tg) > { > struct cfs_rq *cfs_rq = cfs_rq_of(se); > - int cpu = cpu_of(rq_of(cfs_rq)); > u64 now = cfs_rq_clock_task(cfs_rq); > + int cpu = cpu_of(rq_of(cfs_rq)); > > /* > * Track task load average for carrying it to new CPU after migrated, and > * track group sched_entity load average for task_h_load calc in migration > */ > __update_load_avg(now, cpu, &se->avg, > - se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se, NULL); > + se->on_rq * scale_load_down(se->load.weight), > + cfs_rq->curr == se, NULL); > > if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg) > update_tg_load_avg(cfs_rq, 0); > @@ -2718,9 +2719,10 @@ static void attach_entity_load_avg(struc > * update here. we have to update it with prev cfs_rq just before changing > * se's cfs_rq, and get here soon. > */ > - if (se->avg.last_update_time) > + if (se->avg.last_update_time) { > __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)), > - &se->avg, 0, 0, NULL); > + &se->avg, 0, 0, NULL); > + } > > se->avg.last_update_time = cfs_rq->avg.last_update_time; > cfs_rq->avg.load_avg += se->avg.load_avg; > @@ -2732,17 +2734,13 @@ static void attach_entity_load_avg(struc > static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) > { > __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)), > - &se->avg, se->on_rq * scale_load_down(se->load.weight), > - cfs_rq->curr == se, NULL); > + &se->avg, se->on_rq * scale_load_down(se->load.weight), > + cfs_rq->curr == se, NULL); > > - cfs_rq->avg.load_avg = > - max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0); > - cfs_rq->avg.load_sum = > - max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0); > - cfs_rq->avg.util_avg = > - max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0); > - cfs_rq->avg.util_sum = > - max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0); > + cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0); > + cfs_rq->avg.load_sum = max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0); > + cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0); > + cfs_rq->avg.util_sum = max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0); > } > > /* Add the load generated by se into cfs_rq's load average */ > @@ -2751,14 +2749,14 @@ enqueue_entity_load_avg(struct cfs_rq *c > { > struct sched_avg *sa = &se->avg; > u64 now = cfs_rq_clock_task(cfs_rq); > - int migrated = 0, decayed; > + int migrated, decayed; > > - if (sa->last_update_time == 0) > - migrated = 1; > - else > + migrated = !sa->last_update_time; > + if (!migrated) { > __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa, > se->on_rq * scale_load_down(se->load.weight), > cfs_rq->curr == se, NULL); > + } > > decayed = update_cfs_rq_load_avg(now, cfs_rq); > > @@ -2781,7 +2779,7 @@ dequeue_entity_load_avg(struct cfs_rq *c > cfs_rq->runnable_load_avg = > max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0); > cfs_rq->runnable_load_sum = > - max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0); > + max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0); > } > > /* > @@ -2849,6 +2847,11 @@ static inline void > dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {} > static inline void remove_entity_load_avg(struct sched_entity *se) {} > > +static inline void > +attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {} > +static inline void > +detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {} > + > static inline int idle_balance(struct rq *rq) > { > return 0; > @@ -7915,29 +7918,50 @@ prio_changed_fair(struct rq *rq, struct > check_preempt_curr(rq, p, 0); > } > > -static void detach_task_cfs_rq(struct task_struct *p, int queued) > +static inline bool vruntime_normalized(struct task_struct *p) > { > + int queued = task_on_rq_queued(p); > struct sched_entity *se = &p->se; > - struct cfs_rq *cfs_rq = cfs_rq_of(se); > > /* > - * Ensure the task's vruntime is normalized, so that after attaching > - * back it to a cfs_rq the enqueue_entity() will do the right thing. > - * > * If it's queued, then the dequeue_entity(.flags=0) will already > - * have normalized the vruntime, if it's !queued, then only when > - * the task is sleeping it still has a non-normalized vruntime. > - * > + * have normalized the vruntime. > + */ > + if (queued) > + return true; > + > + /* > * When !queued, vruntime of the task has usually NOT been normalized. > * But there are some cases where it has already been normalized: > - * > - * - Moving a forked child which is waiting for being woken up by > + */ > + > + /* > + * - a forked child which is waiting for being woken up by > * wake_up_new_task(). > - * - Moving a task which has been woken up by try_to_wake_up() and > + */ > + if (!se->sum_exec_runtime || p->state == TASK_WAKING) > + return true; > + > + /* > + * - a task which has been woken up by try_to_wake_up() and > * waiting for actually being woken up by sched_ttwu_pending(). > + * > + * XXX this appears wrong!! check history, > + * we appear to always set queued and RUNNING under the same lock instance > + * might be from before TASK_WAKING ? > */ > - if (!queued && p->state != TASK_RUNNING && > - se->sum_exec_runtime && p->state != TASK_WAKING) { > + if (p->state == TASK_RUNNING) > + return true; > + > + return false; > +} > + > +static void detach_task_cfs_rq(struct task_struct *p) > +{ > + struct sched_entity *se = &p->se; > + struct cfs_rq *cfs_rq = cfs_rq_of(se); > + > + if (!vruntime_normalized(p)) { > /* > * Fix up our vruntime so that the current sleep doesn't > * cause 'unlimited' sleep bonus. > @@ -7946,10 +7970,8 @@ static void detach_task_cfs_rq(struct ta > se->vruntime -= cfs_rq->min_vruntime; > } > > -#ifdef CONFIG_SMP > /* Catch up with the cfs_rq and remove our load when we leave */ > detach_entity_load_avg(cfs_rq, se); > -#endif > } > > static void attach_task_cfs_rq(struct task_struct *p, int queued) > @@ -7965,49 +7987,23 @@ static void attach_task_cfs_rq(struct ta > se->depth = se->parent ? se->parent->depth + 1 : 0; > #endif > > -#ifdef CONFIG_SMP > /* synchronize task with its cfs_rq */ > attach_entity_load_avg(cfs_rq, se); > -#endif > > - /* > - * Ensure the task has a non-normalized vruntime when attaching it > - * to a cfs_rq, so that enqueue_entity() at wake-up time will do > - * the right thing. > - * > - * If it's queued, then the enqueue_entity(.flags=0) makes the task > - * has non-normalized vruntime, if it's !queued, then it still has > - * a normalized vruntime. > - * > - * When !queued, a task usually should have a non-normalized vruntime > - * after being attached to a cfs_rq. But there are some cases where > - * we should keep it normalized: > - * > - * - Moving a forked child which is waiting for being woken up by > - * wake_up_new_task(). > - * - Moving a task which has been woken up by try_to_wake_up() and > - * waiting for actually being woken up by sched_ttwu_pending(). > - */ > - if (!queued && p->state != TASK_RUNNING && > - se->sum_exec_runtime && p->state != TASK_WAKING) > + if (!vruntime_normalized(p)) > se->vruntime += cfs_rq->min_vruntime; > } > > static void switched_from_fair(struct rq *rq, struct task_struct *p) > { > - detach_task_cfs_rq(p, task_on_rq_queued(p)); > + detach_task_cfs_rq(p); > } > > -/* > - * We switched to the sched_fair class. > - */ > static void switched_to_fair(struct rq *rq, struct task_struct *p) > { > - int queued = task_on_rq_queued(p); > - > - attach_task_cfs_rq(p, queued); > + attach_task_cfs_rq(p); > > - if (queued) { > + if (task_on_rq_queued(p)) { > /* > * We were most likely switched from sched_rt, so > * kick off the schedule if running, otherwise just see > @@ -8056,8 +8052,7 @@ static void task_move_group_fair(struct > { > detach_task_cfs_rq(p, queued); > set_task_rq(p, task_cpu(p)); > - > - /* tell se's cfs_rq has been changed */ > + /* tell se's cfs_rq has been changed -- migrated */ > p->se.avg.last_update_time = 0; > attach_task_cfs_rq(p, queued); > } > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/