LinuxLists.cc - [PATCH 06/17] sched/fair: Add lag based placement

2023-03-28 11:10:41

Subject: [PATCH 06/17] sched/fair: Add lag based placement

With the introduction of avg_vruntime, it is possible to approximate
lag (the entire purpose of introducing it in fact). Use this to do lag
based placement over sleep+wake.

Specifically, the FAIR_SLEEPERS thing places things too far to the
left and messes up the deadline aspect of EEVDF.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
include/linux/sched.h | 1
kernel/sched/core.c | 1
kernel/sched/fair.c | 129 ++++++++++++++++++++++++++++++++++--------------
kernel/sched/features.h | 8 ++
4 files changed, 104 insertions(+), 35 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -555,6 +555,7 @@ struct sched_entity {
u64 sum_exec_runtime;
u64 vruntime;
u64 prev_sum_exec_runtime;
+ s64 vlag;

u64 nr_migrations;

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4439,6 +4439,7 @@ static void __sched_fork(unsigned long c
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
p->se.vruntime = 0;
+ p->se.vlag = 0;
INIT_LIST_HEAD(&p->se.group_node);

set_latency_offset(p);
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -689,6 +689,15 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
return cfs_rq->min_vruntime + avg;
}

+/*
+ * lag_i = S - s_i = w_i * (V - v_i)
+ */
+void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ SCHED_WARN_ON(!se->on_rq);
+ se->vlag = avg_vruntime(cfs_rq) - se->vruntime;
+}
+
static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
{
u64 min_vruntime = cfs_rq->min_vruntime;
@@ -3417,6 +3426,8 @@ dequeue_load_avg(struct cfs_rq *cfs_rq,
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight)
{
+ unsigned long old_weight = se->load.weight;
+
if (se->on_rq) {
/* commit outstanding execution time */
if (cfs_rq->curr == se)
@@ -3429,6 +3440,14 @@ static void reweight_entity(struct cfs_r

update_load_set(&se->load, weight);

+ if (!se->on_rq) {
+ /*
+ * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v),
+ * we need to scale se->vlag when w_i changes.
+ */
+ se->vlag = div_s64(se->vlag * old_weight, weight);
+ }
+
#ifdef CONFIG_SMP
do {
u32 divider = get_pelt_divider(&se->avg);
@@ -4778,49 +4797,86 @@ static void
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
{
u64 vruntime = avg_vruntime(cfs_rq);
+ s64 lag = 0;

- /* sleeps up to a single latency don't count. */
- if (!initial) {
- unsigned long thresh;
+ /*
+ * Due to how V is constructed as the weighted average of entities,
+ * adding tasks with positive lag, or removing tasks with negative lag
+ * will move 'time' backwards, this can screw around with the lag of
+ * other tasks.
+ *
+ * EEVDF: placement strategy #1 / #2
+ */
+ if (sched_feat(PLACE_LAG) && cfs_rq->nr_running > 1) {
+ struct sched_entity *curr = cfs_rq->curr;
+ unsigned long load;

- if (se_is_idle(se))
- thresh = sysctl_sched_min_granularity;
- else
- thresh = sysctl_sched_latency;
+ lag = se->vlag;

/*
- * Halve their sleep time's effect, to allow
- * for a gentler effect of sleepers:
+ * If we want to place a task and preserve lag, we have to
+ * consider the effect of the new entity on the weighted
+ * average and compensate for this, otherwise lag can quickly
+ * evaporate:
+ *
+ * l_i = V - v_i <=> v_i = V - l_i
+ *
+ * V = v_avg = W*v_avg / W
+ *
+ * V' = (W*v_avg + w_i*v_i) / (W + w_i)
+ * = (W*v_avg + w_i(v_avg - l_i)) / (W + w_i)
+ * = v_avg + w_i*l_i/(W + w_i)
+ *
+ * l_i' = V' - v_i = v_avg + w_i*l_i/(W + w_i) - (v_avg - l)
+ * = l_i - w_i*l_i/(W + w_i)
+ *
+ * l_i = (W + w_i) * l_i' / W
*/
- if (sched_feat(GENTLE_FAIR_SLEEPERS))
- thresh >>= 1;
+ load = cfs_rq->avg_load;
+ if (curr && curr->on_rq)
+ load += curr->load.weight;
+
+ lag *= load + se->load.weight;
+ if (WARN_ON_ONCE(!load))
+ load = 1;
+ lag = div_s64(lag, load);

- vruntime -= thresh;
+ vruntime -= lag;
}

- /*
- * Pull vruntime of the entity being placed to the base level of
- * cfs_rq, to prevent boosting it if placed backwards.
- * However, min_vruntime can advance much faster than real time, with
- * the extreme being when an entity with the minimal weight always runs
- * on the cfs_rq. If the waking entity slept for a long time, its
- * vruntime difference from min_vruntime may overflow s64 and their
- * comparison may get inversed, so ignore the entity's original
- * vruntime in that case.
- * The maximal vruntime speedup is given by the ratio of normal to
- * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
- * When placing a migrated waking entity, its exec_start has been set
- * from a different rq. In order to take into account a possible
- * divergence between new and prev rq's clocks task because of irq and
- * stolen time, we take an additional margin.
- * So, cutting off on the sleep time of
- * 2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
- * should be safe.
- */
- if (entity_is_long_sleeper(se))
- se->vruntime = vruntime;
- else
- se->vruntime = max_vruntime(se->vruntime, vruntime);
+ if (sched_feat(FAIR_SLEEPERS)) {
+
+ /* sleeps up to a single latency don't count. */
+ if (!initial) {
+ unsigned long thresh;
+
+ if (se_is_idle(se))
+ thresh = sysctl_sched_min_granularity;
+ else
+ thresh = sysctl_sched_latency;
+
+ /*
+ * Halve their sleep time's effect, to allow
+ * for a gentler effect of sleepers:
+ */
+ if (sched_feat(GENTLE_FAIR_SLEEPERS))
+ thresh >>= 1;
+
+ vruntime -= thresh;
+ }
+
+ /*
+ * Pull vruntime of the entity being placed to the base level of
+ * cfs_rq, to prevent boosting it if placed backwards. If the entity
+ * slept for a long time, don't even try to compare its vruntime with
+ * the base as it may be too far off and the comparison may get
+ * inversed due to s64 overflow.
+ */
+ if (!entity_is_long_sleeper(se))
+ vruntime = max_vruntime(se->vruntime, vruntime);
+ }
+
+ se->vruntime = vruntime;
}

static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
@@ -4991,6 +5047,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, st

clear_buddies(cfs_rq, se);

+ if (flags & DEQUEUE_SLEEP)
+ update_entity_lag(cfs_rq, se);
+
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
se->on_rq = 0;
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -1,12 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0 */
+
/*
* Only give sleepers 50% of their service deficit. This allows
* them to run sooner, but does not allow tons of sleepers to
* rip the spread apart.
*/
+SCHED_FEAT(FAIR_SLEEPERS, false)
SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)

/*
+ * Using the avg_vruntime, do the right thing and preserve lag across
+ * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
+ */
+SCHED_FEAT(PLACE_LAG, true)
+
+/*
* Prefer to schedule the task we woke last (assuming it failed
* wakeup-preemption), since its likely going to consume data we
* touched, increases cache locality.

2023-04-03 09:21:03

by Chen Yu

[permalink] [raw]

Subject: Re: [PATCH 06/17] sched/fair: Add lag based placement

On 2023-03-28 at 11:26:28 +0200, Peter Zijlstra wrote:
> place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
[...]
> /*
> - * Halve their sleep time's effect, to allow
> - * for a gentler effect of sleepers:
> + * If we want to place a task and preserve lag, we have to
> + * consider the effect of the new entity on the weighted
> + * average and compensate for this, otherwise lag can quickly
> + * evaporate:
> + *
> + * l_i = V - v_i <=> v_i = V - l_i
> + *
> + * V = v_avg = W*v_avg / W
> + *
> + * V' = (W*v_avg + w_i*v_i) / (W + w_i)
If I understand correctly, V' means the avg_runtime if se_i is enqueued?
Then,

V = (\Sum w_j*v_j) / W

V' = (\Sum w_j*v_j + w_i*v_i) / (W + w_i)

Not sure how W*v_avg equals to Sum w_j*v_j ?

> + * = (W*v_avg + w_i(v_avg - l_i)) / (W + w_i)
> + * = v_avg + w_i*l_i/(W + w_i)
v_avg - w_i*l_i/(W + w_i) ?
> + *
> + * l_i' = V' - v_i = v_avg + w_i*l_i/(W + w_i) - (v_avg - l)
> + * = l_i - w_i*l_i/(W + w_i)
> + *
> + * l_i = (W + w_i) * l_i' / W
> */
[...]
> - if (sched_feat(GENTLE_FAIR_SLEEPERS))
> - thresh >>= 1;
> + load = cfs_rq->avg_load;
> + if (curr && curr->on_rq)
> + load += curr->load.weight;
> +
> + lag *= load + se->load.weight;
> + if (WARN_ON_ONCE(!load))
> + load = 1;
> + lag = div_s64(lag, load);
>
Should we calculate
l_i' = l_i * w / (W + w_i) instead of calculating l_i above? I thought we want to adjust
the lag(before enqueue) based on the new weight(after enqueued)

[I will start to run some benchmarks today.]

thanks,
Chenyu

2023-04-05 09:56:47

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH 06/17] sched/fair: Add lag based placement

On Mon, Apr 03, 2023 at 05:18:06PM +0800, Chen Yu wrote:
> On 2023-03-28 at 11:26:28 +0200, Peter Zijlstra wrote:
> > place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
> [...]
> > /*
> > - * Halve their sleep time's effect, to allow
> > - * for a gentler effect of sleepers:
> > + * If we want to place a task and preserve lag, we have to
> > + * consider the effect of the new entity on the weighted
> > + * average and compensate for this, otherwise lag can quickly
> > + * evaporate:
> > + *
> > + * l_i = V - v_i <=> v_i = V - l_i
> > + *
> > + * V = v_avg = W*v_avg / W
> > + *
> > + * V' = (W*v_avg + w_i*v_i) / (W + w_i)
> If I understand correctly, V' means the avg_runtime if se_i is enqueued?
> Then,
>
> V = (\Sum w_j*v_j) / W

multiply by W on both sides to get:

V*W = \Sum w_j*v_j

> V' = (\Sum w_j*v_j + w_i*v_i) / (W + w_i)
>
> Not sure how W*v_avg equals to Sum w_j*v_j ?

V := v_avg

(yeah, I should clean up this stuff, already said to Josh I would)

> > + * = (W*v_avg + w_i(v_avg - l_i)) / (W + w_i)
> > + * = v_avg + w_i*l_i/(W + w_i)
> v_avg - w_i*l_i/(W + w_i) ?

Yup -- seems typing is hard :-)

> > + *
> > + * l_i' = V' - v_i = v_avg + w_i*l_i/(W + w_i) - (v_avg - l)
> > + * = l_i - w_i*l_i/(W + w_i)
> > + *
> > + * l_i = (W + w_i) * l_i' / W
> > */
> [...]
> > - if (sched_feat(GENTLE_FAIR_SLEEPERS))
> > - thresh >>= 1;
> > + load = cfs_rq->avg_load;
> > + if (curr && curr->on_rq)
> > + load += curr->load.weight;
> > +
> > + lag *= load + se->load.weight;
> > + if (WARN_ON_ONCE(!load))
> > + load = 1;
> > + lag = div_s64(lag, load);
> >
> Should we calculate
> l_i' = l_i * w / (W + w_i) instead of calculating l_i above? I thought we want to adjust
> the lag(before enqueue) based on the new weight(after enqueued)

We want to ensure the lag after placement is the lag we got before
dequeue.

I've updated the comment to read like so:

/*
* If we want to place a task and preserve lag, we have to
* consider the effect of the new entity on the weighted
* average and compensate for this, otherwise lag can quickly
* evaporate.
*
* Lag is defined as:
*
* l_i = V - v_i <=> v_i = V - l_i
*
* And we take V to be the weighted average of all v:
*
* V = (\Sum w_j*v_j) / W
*
* Where W is: \Sum w_j
*
* Then, the weighted average after adding an entity with lag
* l_i is given by:
*
* V' = (\Sum w_j*v_j + w_i*v_i) / (W + w_i)
* = (W*V + w_i*(V - l_i)) / (W + w_i)
* = (W*V + w_i*V - w_i*l_i) / (W + w_i)
* = (V*(W + w_i) - w_i*l) / (W + w_i)
* = V - w_i*l_i / (W + w_i)
*
* And the actual lag after adding an entity with l_i is:
*
* l'_i = V' - v_i
* = V - w_i*l_i / (W + w_i) - (V - l_i)
* = l_i - w_i*l_i / (W + w_i)
*
* Which is strictly less than l_i. So in order to preserve lag
* we should inflate the lag before placement such that the
* effective lag after placement comes out right.
*
* As such, invert the above relation for l'_i to get the l_i
* we need to use such that the lag after placement is the lag
* we computed before dequeue.
*
* l'_i = l_i - w_i*l_i / (W + w_i)
* = ((W + w_i)*l_i - w_i*l_i) / (W + w_i)
*
* (W + w_i)*l'_i = (W + w_i)*l_i - w_i*l_i
* = W*l_i
*
* l_i = (W + w_i)*l'_i / W
*/

2023-04-06 03:08:07

by Chen Yu

[permalink] [raw]

Subject: Re: [PATCH 06/17] sched/fair: Add lag based placement

On 2023-04-05 at 11:47:20 +0200, Peter Zijlstra wrote:
> On Mon, Apr 03, 2023 at 05:18:06PM +0800, Chen Yu wrote:
> > On 2023-03-28 at 11:26:28 +0200, Peter Zijlstra wrote:
> > > place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
> > [...]
> > > /*
> > > - * Halve their sleep time's effect, to allow
> > > - * for a gentler effect of sleepers:
> > > + * If we want to place a task and preserve lag, we have to
> > > + * consider the effect of the new entity on the weighted
> > > + * average and compensate for this, otherwise lag can quickly
> > > + * evaporate:
> > > + *
> > > + * l_i = V - v_i <=> v_i = V - l_i
> > > + *
> > > + * V = v_avg = W*v_avg / W
> > > + *
> > > + * V' = (W*v_avg + w_i*v_i) / (W + w_i)
> > If I understand correctly, V' means the avg_runtime if se_i is enqueued?
> > Then,
> >
> > V = (\Sum w_j*v_j) / W
>
> multiply by W on both sides to get:
>
> V*W = \Sum w_j*v_j
>
> > V' = (\Sum w_j*v_j + w_i*v_i) / (W + w_i)
> >
> > Not sure how W*v_avg equals to Sum w_j*v_j ?
>
> V := v_avg
>
I see, thanks for the explanation.
> (yeah, I should clean up this stuff, already said to Josh I would)
>
> > > + * = (W*v_avg + w_i(v_avg - l_i)) / (W + w_i)
> > > + * = v_avg + w_i*l_i/(W + w_i)
> > v_avg - w_i*l_i/(W + w_i) ?
>
> Yup -- seems typing is hard :-)
>
> > > + *
> > > + * l_i' = V' - v_i = v_avg + w_i*l_i/(W + w_i) - (v_avg - l)
> > > + * = l_i - w_i*l_i/(W + w_i)
> > > + *
> > > + * l_i = (W + w_i) * l_i' / W
> > > */
> > [...]
> > > - if (sched_feat(GENTLE_FAIR_SLEEPERS))
> > > - thresh >>= 1;
> > > + load = cfs_rq->avg_load;
> > > + if (curr && curr->on_rq)
> > > + load += curr->load.weight;
> > > +
> > > + lag *= load + se->load.weight;
> > > + if (WARN_ON_ONCE(!load))
> > > + load = 1;
> > > + lag = div_s64(lag, load);
> > >
> > Should we calculate
> > l_i' = l_i * w / (W + w_i) instead of calculating l_i above? I thought we want to adjust
> > the lag(before enqueue) based on the new weight(after enqueued)
>
> We want to ensure the lag after placement is the lag we got before
> dequeue.
>
> I've updated the comment to read like so:
>
> /*
> * If we want to place a task and preserve lag, we have to
> * consider the effect of the new entity on the weighted
> * average and compensate for this, otherwise lag can quickly
> * evaporate.
> *
> * Lag is defined as:
> *
> * l_i = V - v_i <=> v_i = V - l_i
> *
> * And we take V to be the weighted average of all v:
> *
> * V = (\Sum w_j*v_j) / W
> *
> * Where W is: \Sum w_j
> *
> * Then, the weighted average after adding an entity with lag
> * l_i is given by:
> *
> * V' = (\Sum w_j*v_j + w_i*v_i) / (W + w_i)
> * = (W*V + w_i*(V - l_i)) / (W + w_i)
> * = (W*V + w_i*V - w_i*l_i) / (W + w_i)
> * = (V*(W + w_i) - w_i*l) / (W + w_i)
small typo w_i*l -> w_i*l_i
> * = V - w_i*l_i / (W + w_i)
> *
> * And the actual lag after adding an entity with l_i is:
> *
> * l'_i = V' - v_i
> * = V - w_i*l_i / (W + w_i) - (V - l_i)
> * = l_i - w_i*l_i / (W + w_i)
> *
> * Which is strictly less than l_i. So in order to preserve lag
> * we should inflate the lag before placement such that the
> * effective lag after placement comes out right.
> *
> * As such, invert the above relation for l'_i to get the l_i
> * we need to use such that the lag after placement is the lag
> * we computed before dequeue.
> *
> * l'_i = l_i - w_i*l_i / (W + w_i)
> * = ((W + w_i)*l_i - w_i*l_i) / (W + w_i)
> *
> * (W + w_i)*l'_i = (W + w_i)*l_i - w_i*l_i
> * = W*l_i
> *
> * l_i = (W + w_i)*l'_i / W
> */
Got it, thanks! This is very clear.

thanks,
Chenyu

2023-04-13 15:59:58

by Chen Yu

[permalink] [raw]

Subject: Re: [PATCH 06/17] sched/fair: Add lag based placement

On 2023-04-05 at > On Mon, Apr > > On 2023-03-28 So I launched the
baseline: 6.3-rc6

compare: sched/eevdf

------------------------- schbench:mthreads = 2
baseline worker_threads
25% 80.00 (0.00%) 50% 183.70 (0.35%) 75% 4065 (69.65%) 100% 13696 (5.25%) 125% 16457 (10.50%) 150% 31177 (6.84%) 175% 40729 (6.11%) 200% 52224 (10.42%)

worker_threads
25% 96.30 (0.66%) 50% 187.20 (0.21%) 75% 3034 (5.56%) 100% 648.20 (64.70%) 125% 3506 (2.79%) 150% 6793 (1.39%) 175% 9961 (1.51%) 200% 13660 (1.38%)

Summary for schbench: And this is aligned CPUs, eevdf benefits
-------------------------

hackbench: ipc=pipe
baseline worker_threads
1 103103 25% 115562 50% 296514 75% 498059 100% 804560

worker_threads
1 102172 25% 1076503 50% 1394311 75% 1476502 100% 1512706

Summary for hackbench
------------------------- unixbench: test=pipe

baseline nr_task
1 1405 25% 77942 50% 155384 75% 179756 100% 204030 125% 204972 150% 205891 175% 207051 200% 209387

nr_task
1 1405 25% 78640 50% 157153 75% 180152 100% 203479 125% 203866 150% 204872 175% 205799 200% 207152
Seems to have no -------------------------
netperf: TCP_RR, ipv4, loopback

baseline nr_threads
25% 56232 50% 49876 75% 24281 100% 73598 125% 59119 150% 49124 175% 41929 200% 36543
nr_threads
25% 55296 50% 48659 75% 24741 100% 76455 125% 60082 150% 49618 175% 41974 200% 36677
Seems to have no -------------------------
stress-ng: futex

baseline nr_threads
25% 207926 50% 46611 75% 71381 100% 58766 125% 59859 150% 52869 175% 49607 200% 56011

When the system there are some improvement. Per the perf profile eevdf patch applied:
50.82 -0.7 but there are more 135095 +15.4% which is near the That is to say, have some impact on throughput?

thanks,
Chenyu
11:47:20 +0200, Peter Zijlstra wrote:
03, 2023 at 05:18:06PM +0800, Chen Yu wrote:
at 11:26:28 +0200, Peter Zijlstra wrote:
test on another platform with more CPUs,
branch on top of commit 8c59a975d5ee ("sched/eevdf: Debug / validation crud")
-------------------------------------------------------------
eevdf+NO_PLACE_BONUS
+19.2% 95.40 schbench.latency_90%_us
(0.51%) stddev
+2.2% 187.80 schbench.latency_90%_us
(0.46%) stddev
-21.4% 3193 schbench.latency_90%_us
(3.42%) stddev
-92.4% 1040 schbench.latency_90%_us
(69.03%) stddev
-78.6% 3514 schbench.latency_90%_us
(6.25%) stddev
-77.5% 7008 schbench.latency_90%_us
(5.19%) stddev
-75.1% 10160 schbench.latency_90%_us
(2.53%) stddev
-74.4% 13385 schbench.latency_90%_us
(1.72%) stddev
eevdf+NO_PLACE_BONUS eevdf+PLACE_BONUS
+0.2% 96.50 schbench.latency_90%_us
(0.52%) stddev
-3.0% 181.60 schbench.latency_90%_us
(0.71%) stddev
-84.1% 482.50 schbench.latency_90%_us
(27.40%) stddev
+114.7% 1391 schbench.latency_90%_us
(10.05%) stddev
-3.0% 3400 schbench.latency_90%_us
(9.89%) stddev
+29.6% 8803 schbench.latency_90%_us
(7.30%) stddev
+9.2% 10876 schbench.latency_90%_us
(6.54%) stddev
+3.3% 14118 schbench.latency_90%_us
(6.02%) stddev
in most cases eevdf+NO_PLACE_BONUS gives the best performance.
with the previous test on another platform with smaller number of
schbench overall.
--------------------------------------------------------------
mode=process default fd:20
eevdf+NO_PLACE_BONUS
-0.3% 102794 hackbench.throughput_avg
+825.7% 1069725 hackbench.throughput_avg
+352.1% 1340414 hackbench.throughput_avg
+190.8% 1448156 hackbench.throughput_avg
+74.8% 1406413 hackbench.throughput_avg
eevdf+NO_PLACE_BONUS eevdf+PLACE_BONUS
+1.5% 103661 hackbench.throughput_avg
-52.8% 508612 hackbench.throughput_avg
-68.2% 443251 hackbench.throughput_avg
-70.2% 440391 hackbench.throughput_avg
-76.2% 359741 hackbench.throughput_avg
pipe process test: in most cases eevdf+NO_PLACE_BONUS gives the best performance.
------------------------------------------------------------
eevdf+NO_PLACE_BONUS
-0.5% 1398 unixbench.score
+0.9% 78680 unixbench.score
+1.1% 157100 unixbench.score
+0.3% 180295 unixbench.score
-0.2% 203540 unixbench.score
-0.4% 204062 unixbench.score
-0.5% 204792 unixbench.score
-0.5% 206047 unixbench.score
-0.9% 207559 unixbench.score
eevdf+NO_PLACE_BONUS eevdf+PLACE_BONUS
-0.3% 1401 unixbench.score
+0.0% 78647 unixbench.score
-0.0% 157093 unixbench.score
+0.0% 180205 unixbench.score
-0.0% 203464 unixbench.score
+0.1% 204013 unixbench.score
-0.0% 204838 unixbench.score
+0.0% 205824 unixbench.score
+0.2% 207546 unixbench.score
impact on unixbench in pipe mode.
-------------------------------------------------------
eevdf+NO_PLACE_BONUS
-1.7% 55265 netperf.Throughput_tps
-3.1% 48338 netperf.Throughput_tps
+1.9% 24741 netperf.Throughput_tps
+3.8% 76375 netperf.Throughput_tps
+1.4% 59968 netperf.Throughput_tps
+1.2% 49727 netperf.Throughput_tps
+0.2% 42004 netperf.Throughput_tps
+0.4% 36677 netperf.Throughput_tps
eevdf+NO_PLACE_BONUS eevdf+PLACE_BONUS
+4.7% 57877 netperf.Throughput_tps
+1.9% 49585 netperf.Throughput_tps
+0.3% 24807 netperf.Throughput_tps
+6.7% 81548 netperf.Throughput_tps
+7.6% 64622 netperf.Throughput_tps
+7.7% 53429 netperf.Throughput_tps
+7.6% 45160 netperf.Throughput_tps
+6.5% 39067 netperf.Throughput_tps
impact on netperf.
----------------------------------------------------------
eevdf+NO_PLACE_BONUS
-21.0% 164356 stress-ng.futex.ops_per_sec
-16.1% 39130 stress-ng.futex.ops_per_sec
-11.3% 63283 stress-ng.futex.ops_per_sec
-0.8% 58269 stress-ng.futex.ops_per_sec
+11.3% 66645 stress-ng.futex.ops_per_sec
+7.6% 56863 stress-ng.futex.ops_per_sec
+22.9% 60969 stress-ng.futex.ops_per_sec
+11.8% 62631 stress-ng.futex.ops_per_sec
is not busy, there is regression. When the system gets busier,
Even with PLACE_BONUS enabled, there are still regression.
of 50% case, there are nearly the same ratio of wakeup with vs without
50.15 perf-profile.children.cycles-pp.futex_wake
preemption after eevdf enabled:
155943 stress-ng.time.involuntary_context_switches
performance loss -16.1%
eevdf help futex wakee grab the CPU easier(benefit latency), while might

2023-04-13 16:08:39

by Chen Yu

[permalink] [raw]

Subject: Re: [PATCH 06/17] sched/fair: Add lag based placement

On 2023-04-13 at 23:42:34 +0800, Chen Yu wrote:
> On 2023-04-05 at 11:47:20 +0200, Peter Zijlstra wrote:
> > On Mon, Apr 03, 2023 at 05:18:06PM +0800, Chen Yu wrote:
> > > On 2023-03-28 at 11:26:28 +0200, Peter Zijlstra wrote:
> So I launched the test on another platform with more CPUs,
>
> baseline: 6.3-rc6
>
> compare: sched/eevdf branch on top of commit 8c59a975d5ee ("sched/eevdf: Debug / validation crud")
> Chenyu
I realized that you have pushed some changes to eevdf branch yesterday, so the test was
actually tested on top of this commit I pulled 1 week ago:
commit 4f58ee3ba245ff97a075b17b454256f9c4d769c4 ("sched/eevdf: Debug / validation crud")

thanks,
Chenyu