LinuxLists.cc - [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

2022-08-29 06:23:53

Subject: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

From: Vincent Donnefort <[email protected]>

The new sysctl sched_pelt_multiplier allows a user to set a clock
multiplier to x2 or x4 (x1 being the default). This clock multiplier
artificially speeds up PELT ramp up/down similarly to use a faster
half-life than the default 32ms.

- x1: 32ms half-life
- x2: 16ms half-life
- x4: 8ms half-life

Internally, a new clock is created: rq->clock_task_mult. It sits in the
clock hierarchy between rq->clock_task and rq->clock_pelt.

Signed-off-by: Vincent Donnefort <[email protected]>
Signed-off-by: Dietmar Eggemann <[email protected]>
---
kernel/sched/core.c | 2 +-
kernel/sched/pelt.c | 60 ++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/pelt.h | 42 ++++++++++++++++++++++++++++---
kernel/sched/sched.h | 1 +
4 files changed, 100 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 603a80ec9b0e..6203cead4ad3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -727,7 +727,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
update_irq_load_avg(rq, irq_delta + steal);
#endif
- update_rq_clock_pelt(rq, delta);
+ update_rq_clock_task_mult(rq, delta);
}

void update_rq_clock(struct rq *rq)
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index 9adfc4744544..9fa853a64269 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -472,3 +472,63 @@ int update_irq_load_avg(struct rq *rq, u64 running)
return ret;
}
#endif
+
+__read_mostly unsigned int sched_pelt_lshift;
+
+#ifdef CONFIG_SYSCTL
+static unsigned int sysctl_sched_pelt_multiplier = 1;
+
+int sched_pelt_multiplier(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ static DEFINE_MUTEX(mutex);
+ unsigned int old;
+ int ret;
+
+ mutex_lock(&mutex);
+ old = sysctl_sched_pelt_multiplier;
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (ret)
+ goto undo;
+ if (!write)
+ goto done;
+
+ switch (sysctl_sched_pelt_multiplier) {
+ case 1:
+ fallthrough;
+ case 2:
+ fallthrough;
+ case 4:
+ WRITE_ONCE(sched_pelt_lshift,
+ sysctl_sched_pelt_multiplier >> 1);
+ goto done;
+ default:
+ ret = -EINVAL;
+ }
+
+undo:
+ sysctl_sched_pelt_multiplier = old;
+done:
+ mutex_unlock(&mutex);
+
+ return ret;
+}
+
+static struct ctl_table sched_pelt_sysctls[] = {
+ {
+ .procname = "sched_pelt_multiplier",
+ .data = &sysctl_sched_pelt_multiplier,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_pelt_multiplier,
+ },
+ {}
+};
+
+static int __init sched_pelt_sysctl_init(void)
+{
+ register_sysctl_init("kernel", sched_pelt_sysctls);
+ return 0;
+}
+late_initcall(sched_pelt_sysctl_init);
+#endif
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index 3a0e0dc28721..9b35b5072bae 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -61,6 +61,14 @@ static inline void cfs_se_util_change(struct sched_avg *avg)
WRITE_ONCE(avg->util_est.enqueued, enqueued);
}

+static inline u64 rq_clock_task_mult(struct rq *rq)
+{
+ lockdep_assert_rq_held(rq);
+ assert_clock_updated(rq);
+
+ return rq->clock_task_mult;
+}
+
static inline u64 rq_clock_pelt(struct rq *rq)
{
lockdep_assert_rq_held(rq);
@@ -72,7 +80,7 @@ static inline u64 rq_clock_pelt(struct rq *rq)
/* The rq is idle, we can sync to clock_task */
static inline void _update_idle_rq_clock_pelt(struct rq *rq)
{
- rq->clock_pelt = rq_clock_task(rq);
+ rq->clock_pelt = rq_clock_task_mult(rq);

u64_u32_store(rq->clock_idle, rq_clock(rq));
/* Paired with smp_rmb in migrate_se_pelt_lag() */
@@ -121,6 +129,27 @@ static inline void update_rq_clock_pelt(struct rq *rq, s64 delta)
rq->clock_pelt += delta;
}

+extern unsigned int sched_pelt_lshift;
+
+/*
+ * absolute time |1 |2 |3 |4 |5 |6 |
+ * @ mult = 1 --------****************--------****************-
+ * @ mult = 2 --------********----------------********---------
+ * @ mult = 4 --------****--------------------****-------------
+ * clock task mult
+ * @ mult = 2 | | |2 |3 | | | | |5 |6 | | |
+ * @ mult = 4 | | | | |2|3| | | | | | | | | | |5|6| | | | | | |
+ *
+ */
+static inline void update_rq_clock_task_mult(struct rq *rq, s64 delta)
+{
+ delta <<= READ_ONCE(sched_pelt_lshift);
+
+ rq->clock_task_mult += delta;
+
+ update_rq_clock_pelt(rq, delta);
+}
+
/*
* When rq becomes idle, we have to check if it has lost idle time
* because it was fully busy. A rq is fully used when the /Sum util_sum
@@ -147,7 +176,7 @@ static inline void update_idle_rq_clock_pelt(struct rq *rq)
* rq's clock_task.
*/
if (util_sum >= divider)
- rq->lost_idle_time += rq_clock_task(rq) - rq->clock_pelt;
+ rq->lost_idle_time += rq_clock_task_mult(rq) - rq->clock_pelt;

_update_idle_rq_clock_pelt(rq);
}
@@ -218,13 +247,18 @@ update_irq_load_avg(struct rq *rq, u64 running)
return 0;
}

-static inline u64 rq_clock_pelt(struct rq *rq)
+static inline u64 rq_clock_task_mult(struct rq *rq)
{
return rq_clock_task(rq);
}

+static inline u64 rq_clock_pelt(struct rq *rq)
+{
+ return rq_clock_task_mult(rq);
+}
+
static inline void
-update_rq_clock_pelt(struct rq *rq, s64 delta) { }
+update_rq_clock_task_mult(struct rq *rq, s64 delta) { }

static inline void
update_idle_rq_clock_pelt(struct rq *rq) { }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index da17be6f27fd..62fc3cf10ea7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1007,6 +1007,7 @@ struct rq {
u64 clock;
/* Ensure that all clocks are in the same cache line */
u64 clock_task ____cacheline_aligned;
+ u64 clock_task_mult;
u64 clock_pelt;
unsigned long lost_idle_time;
u64 clock_pelt_idle;
--
2.25.1

2022-08-29 08:13:25

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On Mon, Aug 29, 2022 at 07:54:50AM +0200, Dietmar Eggemann wrote:
> From: Vincent Donnefort <[email protected]>
>
> The new sysctl sched_pelt_multiplier allows a user to set a clock
> multiplier to x2 or x4 (x1 being the default). This clock multiplier
> artificially speeds up PELT ramp up/down similarly to use a faster
> half-life than the default 32ms.
>
> - x1: 32ms half-life
> - x2: 16ms half-life
> - x4: 8ms half-life
>
> Internally, a new clock is created: rq->clock_task_mult. It sits in the
> clock hierarchy between rq->clock_task and rq->clock_pelt.
>
> Signed-off-by: Vincent Donnefort <[email protected]>
> Signed-off-by: Dietmar Eggemann <[email protected]>

> +extern unsigned int sched_pelt_lshift;
> +
> +/*
> + * absolute time |1 |2 |3 |4 |5 |6 |
> + * @ mult = 1 --------****************--------****************-
> + * @ mult = 2 --------********----------------********---------
> + * @ mult = 4 --------****--------------------****-------------
> + * clock task mult
> + * @ mult = 2 | | |2 |3 | | | | |5 |6 | | |
> + * @ mult = 4 | | | | |2|3| | | | | | | | | | |5|6| | | | | | |
> + *
> + */
> +static inline void update_rq_clock_task_mult(struct rq *rq, s64 delta)
> +{
> + delta <<= READ_ONCE(sched_pelt_lshift);
> +
> + rq->clock_task_mult += delta;
> +
> + update_rq_clock_pelt(rq, delta);
> +}

Hurmph... I'd almost go write you something like
static_call()/static_branch() but for immediates.

That said; given there's only like 3 options, perhaps a few
static_branch() instances work just fine ?

2022-08-29 10:43:26

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On Mon, Aug 29, 2022 at 10:08:13AM +0200, Peter Zijlstra wrote:
> On Mon, Aug 29, 2022 at 07:54:50AM +0200, Dietmar Eggemann wrote:
> > From: Vincent Donnefort <[email protected]>
> >
> > The new sysctl sched_pelt_multiplier allows a user to set a clock
> > multiplier to x2 or x4 (x1 being the default). This clock multiplier
> > artificially speeds up PELT ramp up/down similarly to use a faster
> > half-life than the default 32ms.
> >
> > - x1: 32ms half-life
> > - x2: 16ms half-life
> > - x4: 8ms half-life
> >
> > Internally, a new clock is created: rq->clock_task_mult. It sits in the
> > clock hierarchy between rq->clock_task and rq->clock_pelt.
> >
> > Signed-off-by: Vincent Donnefort <[email protected]>
> > Signed-off-by: Dietmar Eggemann <[email protected]>
>
> > +extern unsigned int sched_pelt_lshift;
> > +
> > +/*
> > + * absolute time |1 |2 |3 |4 |5 |6 |
> > + * @ mult = 1 --------****************--------****************-
> > + * @ mult = 2 --------********----------------********---------
> > + * @ mult = 4 --------****--------------------****-------------
> > + * clock task mult
> > + * @ mult = 2 | | |2 |3 | | | | |5 |6 | | |
> > + * @ mult = 4 | | | | |2|3| | | | | | | | | | |5|6| | | | | | |
> > + *
> > + */
> > +static inline void update_rq_clock_task_mult(struct rq *rq, s64 delta)
> > +{
> > + delta <<= READ_ONCE(sched_pelt_lshift);
> > +
> > + rq->clock_task_mult += delta;
> > +
> > + update_rq_clock_pelt(rq, delta);
> > +}
>
> Hurmph... I'd almost go write you something like
> static_call()/static_branch() but for immediates.
>
> That said; given there's only like 3 options, perhaps a few
> static_branch() instances work just fine ?

Also, I'm not at all sure about exposing that as an official sysctl.

How about something like so?

---
Subject: sched/pelt: Introduce PELT multiplier
From: Vincent Donnefort <[email protected]>
Date: Mon, 29 Aug 2022 07:54:50 +0200

From: Vincent Donnefort <[email protected]>

The new sysctl sched_pelt_multiplier allows a user to set a clock
multiplier to x2 or x4 (x1 being the default). This clock multiplier
artificially speeds up PELT ramp up/down similarly to use a faster
half-life than the default 32ms.

- x1: 32ms half-life
- x2: 16ms half-life
- x4: 8ms half-life

Internally, a new clock is created: rq->clock_task_mult. It sits in the
clock hierarchy between rq->clock_task and rq->clock_pelt.

[peterz: Use sched_feat()]
Signed-off-by: Vincent Donnefort <[email protected]>
Signed-off-by: Dietmar Eggemann <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
kernel/sched/core.c | 2 +-
kernel/sched/features.h | 3 +++
kernel/sched/pelt.h | 45 +++++++++++++++++++++++++++++++++++++++++----
kernel/sched/sched.h | 1 +
4 files changed, 46 insertions(+), 5 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -727,7 +727,7 @@ static void update_rq_clock_task(struct
if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
update_irq_load_avg(rq, irq_delta + steal);
#endif
- update_rq_clock_pelt(rq, delta);
+ update_rq_clock_task_mult(rq, delta);
}

void update_rq_clock(struct rq *rq)
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -101,3 +101,6 @@ SCHED_FEAT(LATENCY_WARN, false)

SCHED_FEAT(ALT_PERIOD, true)
SCHED_FEAT(BASE_SLICE, true)
+
+SCHED_FEAT(PELT_M2, false)
+SCHED_FEAT(PELT_M4, false)
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -61,6 +61,14 @@ static inline void cfs_se_util_change(st
WRITE_ONCE(avg->util_est.enqueued, enqueued);
}

+static inline u64 rq_clock_task_mult(struct rq *rq)
+{
+ lockdep_assert_rq_held(rq);
+ assert_clock_updated(rq);
+
+ return rq->clock_task_mult;
+}
+
static inline u64 rq_clock_pelt(struct rq *rq)
{
lockdep_assert_rq_held(rq);
@@ -72,7 +80,7 @@ static inline u64 rq_clock_pelt(struct r
/* The rq is idle, we can sync to clock_task */
static inline void _update_idle_rq_clock_pelt(struct rq *rq)
{
- rq->clock_pelt = rq_clock_task(rq);
+ rq->clock_pelt = rq_clock_task_mult(rq);

u64_u32_store(rq->clock_idle, rq_clock(rq));
/* Paired with smp_rmb in migrate_se_pelt_lag() */
@@ -121,6 +129,30 @@ static inline void update_rq_clock_pelt(
rq->clock_pelt += delta;
}

+extern unsigned int sched_pelt_lshift;
+
+/*
+ * absolute time |1 |2 |3 |4 |5 |6 |
+ * @ mult = 1 --------****************--------****************-
+ * @ mult = 2 --------********----------------********---------
+ * @ mult = 4 --------****--------------------****-------------
+ * clock task mult
+ * @ mult = 2 | | |2 |3 | | | | |5 |6 | | |
+ * @ mult = 4 | | | | |2|3| | | | | | | | | | |5|6| | | | | | |
+ *
+ */
+static inline void update_rq_clock_task_mult(struct rq *rq, s64 delta)
+{
+ if (sched_feat(PELT_M2))
+ delta *= 2;
+ if (sched_feat(PELT_M4))
+ delta *= 4;
+
+ rq->clock_task_mult += delta;
+
+ update_rq_clock_pelt(rq, delta);
+}
+
/*
* When rq becomes idle, we have to check if it has lost idle time
* because it was fully busy. A rq is fully used when the /Sum util_sum
@@ -147,7 +179,7 @@ static inline void update_idle_rq_clock_
* rq's clock_task.
*/
if (util_sum >= divider)
- rq->lost_idle_time += rq_clock_task(rq) - rq->clock_pelt;
+ rq->lost_idle_time += rq_clock_task_mult(rq) - rq->clock_pelt;

_update_idle_rq_clock_pelt(rq);
}
@@ -218,13 +250,18 @@ update_irq_load_avg(struct rq *rq, u64 r
return 0;
}

-static inline u64 rq_clock_pelt(struct rq *rq)
+static inline u64 rq_clock_task_mult(struct rq *rq)
{
return rq_clock_task(rq);
}

+static inline u64 rq_clock_pelt(struct rq *rq)
+{
+ return rq_clock_task_mult(rq);
+}
+
static inline void
-update_rq_clock_pelt(struct rq *rq, s64 delta) { }
+update_rq_clock_task_mult(struct rq *rq, s64 delta) { }

static inline void
update_idle_rq_clock_pelt(struct rq *rq) { }
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1009,6 +1009,7 @@ struct rq {
u64 clock;
/* Ensure that all clocks are in the same cache line */
u64 clock_task ____cacheline_aligned;
+ u64 clock_task_mult;
u64 clock_pelt;
unsigned long lost_idle_time;
u64 clock_pelt_idle;

2022-08-29 11:01:23

by Vincent Guittot

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On Mon, 29 Aug 2022 at 12:03, Peter Zijlstra <[email protected]> wrote:
>
> On Mon, Aug 29, 2022 at 10:08:13AM +0200, Peter Zijlstra wrote:
> > On Mon, Aug 29, 2022 at 07:54:50AM +0200, Dietmar Eggemann wrote:
> > > From: Vincent Donnefort <[email protected]>
> > >
> > > The new sysctl sched_pelt_multiplier allows a user to set a clock
> > > multiplier to x2 or x4 (x1 being the default). This clock multiplier
> > > artificially speeds up PELT ramp up/down similarly to use a faster
> > > half-life than the default 32ms.
> > >
> > > - x1: 32ms half-life
> > > - x2: 16ms half-life
> > > - x4: 8ms half-life
> > >
> > > Internally, a new clock is created: rq->clock_task_mult. It sits in the
> > > clock hierarchy between rq->clock_task and rq->clock_pelt.
> > >
> > > Signed-off-by: Vincent Donnefort <[email protected]>
> > > Signed-off-by: Dietmar Eggemann <[email protected]>
> >
> > > +extern unsigned int sched_pelt_lshift;
> > > +
> > > +/*
> > > + * absolute time |1 |2 |3 |4 |5 |6 |
> > > + * @ mult = 1 --------****************--------****************-
> > > + * @ mult = 2 --------********----------------********---------
> > > + * @ mult = 4 --------****--------------------****-------------
> > > + * clock task mult
> > > + * @ mult = 2 | | |2 |3 | | | | |5 |6 | | |
> > > + * @ mult = 4 | | | | |2|3| | | | | | | | | | |5|6| | | | | | |
> > > + *
> > > + */
> > > +static inline void update_rq_clock_task_mult(struct rq *rq, s64 delta)
> > > +{
> > > + delta <<= READ_ONCE(sched_pelt_lshift);
> > > +
> > > + rq->clock_task_mult += delta;
> > > +
> > > + update_rq_clock_pelt(rq, delta);
> > > +}
> >
> > Hurmph... I'd almost go write you something like
> > static_call()/static_branch() but for immediates.
> >
> > That said; given there's only like 3 options, perhaps a few
> > static_branch() instances work just fine ?
>
> Also, I'm not at all sure about exposing that as an official sysctl.

Me too, I would even make it a boot time parameter so we can remove
the new clock_task_mult clock and left shift clock_taslk or the delta
before passing it to clock_pelt

>
> How about something like so?
>
> ---
> Subject: sched/pelt: Introduce PELT multiplier
> From: Vincent Donnefort <[email protected]>
> Date: Mon, 29 Aug 2022 07:54:50 +0200
>
> From: Vincent Donnefort <[email protected]>
>
> The new sysctl sched_pelt_multiplier allows a user to set a clock
> multiplier to x2 or x4 (x1 being the default). This clock multiplier
> artificially speeds up PELT ramp up/down similarly to use a faster
> half-life than the default 32ms.
>
> - x1: 32ms half-life
> - x2: 16ms half-life
> - x4: 8ms half-life
>
> Internally, a new clock is created: rq->clock_task_mult. It sits in the
> clock hierarchy between rq->clock_task and rq->clock_pelt.
>
> [peterz: Use sched_feat()]
> Signed-off-by: Vincent Donnefort <[email protected]>
> Signed-off-by: Dietmar Eggemann <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> kernel/sched/core.c | 2 +-
> kernel/sched/features.h | 3 +++
> kernel/sched/pelt.h | 45 +++++++++++++++++++++++++++++++++++++++++----
> kernel/sched/sched.h | 1 +
> 4 files changed, 46 insertions(+), 5 deletions(-)
>
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -727,7 +727,7 @@ static void update_rq_clock_task(struct
> if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
> update_irq_load_avg(rq, irq_delta + steal);
> #endif
> - update_rq_clock_pelt(rq, delta);
> + update_rq_clock_task_mult(rq, delta);
> }
>
> void update_rq_clock(struct rq *rq)
> --- a/kernel/sched/features.h
> +++ b/kernel/sched/features.h
> @@ -101,3 +101,6 @@ SCHED_FEAT(LATENCY_WARN, false)
>
> SCHED_FEAT(ALT_PERIOD, true)
> SCHED_FEAT(BASE_SLICE, true)
> +
> +SCHED_FEAT(PELT_M2, false)
> +SCHED_FEAT(PELT_M4, false)
> --- a/kernel/sched/pelt.h
> +++ b/kernel/sched/pelt.h
> @@ -61,6 +61,14 @@ static inline void cfs_se_util_change(st
> WRITE_ONCE(avg->util_est.enqueued, enqueued);
> }
>
> +static inline u64 rq_clock_task_mult(struct rq *rq)
> +{
> + lockdep_assert_rq_held(rq);
> + assert_clock_updated(rq);
> +
> + return rq->clock_task_mult;
> +}
> +
> static inline u64 rq_clock_pelt(struct rq *rq)
> {
> lockdep_assert_rq_held(rq);
> @@ -72,7 +80,7 @@ static inline u64 rq_clock_pelt(struct r
> /* The rq is idle, we can sync to clock_task */
> static inline void _update_idle_rq_clock_pelt(struct rq *rq)
> {
> - rq->clock_pelt = rq_clock_task(rq);
> + rq->clock_pelt = rq_clock_task_mult(rq);
>
> u64_u32_store(rq->clock_idle, rq_clock(rq));
> /* Paired with smp_rmb in migrate_se_pelt_lag() */
> @@ -121,6 +129,30 @@ static inline void update_rq_clock_pelt(
> rq->clock_pelt += delta;
> }
>
> +extern unsigned int sched_pelt_lshift;
> +
> +/*
> + * absolute time |1 |2 |3 |4 |5 |6 |
> + * @ mult = 1 --------****************--------****************-
> + * @ mult = 2 --------********----------------********---------
> + * @ mult = 4 --------****--------------------****-------------
> + * clock task mult
> + * @ mult = 2 | | |2 |3 | | | | |5 |6 | | |
> + * @ mult = 4 | | | | |2|3| | | | | | | | | | |5|6| | | | | | |
> + *
> + */
> +static inline void update_rq_clock_task_mult(struct rq *rq, s64 delta)
> +{
> + if (sched_feat(PELT_M2))
> + delta *= 2;
> + if (sched_feat(PELT_M4))
> + delta *= 4;
> +
> + rq->clock_task_mult += delta;
> +
> + update_rq_clock_pelt(rq, delta);
> +}
> +
> /*
> * When rq becomes idle, we have to check if it has lost idle time
> * because it was fully busy. A rq is fully used when the /Sum util_sum
> @@ -147,7 +179,7 @@ static inline void update_idle_rq_clock_
> * rq's clock_task.
> */
> if (util_sum >= divider)
> - rq->lost_idle_time += rq_clock_task(rq) - rq->clock_pelt;
> + rq->lost_idle_time += rq_clock_task_mult(rq) - rq->clock_pelt;
>
> _update_idle_rq_clock_pelt(rq);
> }
> @@ -218,13 +250,18 @@ update_irq_load_avg(struct rq *rq, u64 r
> return 0;
> }
>
> -static inline u64 rq_clock_pelt(struct rq *rq)
> +static inline u64 rq_clock_task_mult(struct rq *rq)
> {
> return rq_clock_task(rq);
> }
>
> +static inline u64 rq_clock_pelt(struct rq *rq)
> +{
> + return rq_clock_task_mult(rq);
> +}
> +
> static inline void
> -update_rq_clock_pelt(struct rq *rq, s64 delta) { }
> +update_rq_clock_task_mult(struct rq *rq, s64 delta) { }
>
> static inline void
> update_idle_rq_clock_pelt(struct rq *rq) { }
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1009,6 +1009,7 @@ struct rq {
> u64 clock;
> /* Ensure that all clocks are in the same cache line */
> u64 clock_task ____cacheline_aligned;
> + u64 clock_task_mult;
> u64 clock_pelt;
> unsigned long lost_idle_time;
> u64 clock_pelt_idle;

2022-08-29 15:13:26

by Quentin Perret

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On Monday 29 Aug 2022 at 12:13:26 (+0200), Vincent Guittot wrote:
> On Mon, 29 Aug 2022 at 12:03, Peter Zijlstra <[email protected]> wrote:
> >
> > On Mon, Aug 29, 2022 at 10:08:13AM +0200, Peter Zijlstra wrote:
> > > On Mon, Aug 29, 2022 at 07:54:50AM +0200, Dietmar Eggemann wrote:
> > > > From: Vincent Donnefort <[email protected]>
> > > >
> > > > The new sysctl sched_pelt_multiplier allows a user to set a clock
> > > > multiplier to x2 or x4 (x1 being the default). This clock multiplier
> > > > artificially speeds up PELT ramp up/down similarly to use a faster
> > > > half-life than the default 32ms.
> > > >
> > > > - x1: 32ms half-life
> > > > - x2: 16ms half-life
> > > > - x4: 8ms half-life
> > > >
> > > > Internally, a new clock is created: rq->clock_task_mult. It sits in the
> > > > clock hierarchy between rq->clock_task and rq->clock_pelt.
> > > >
> > > > Signed-off-by: Vincent Donnefort <[email protected]>
> > > > Signed-off-by: Dietmar Eggemann <[email protected]>
> > >
> > > > +extern unsigned int sched_pelt_lshift;
> > > > +
> > > > +/*
> > > > + * absolute time |1 |2 |3 |4 |5 |6 |
> > > > + * @ mult = 1 --------****************--------****************-
> > > > + * @ mult = 2 --------********----------------********---------
> > > > + * @ mult = 4 --------****--------------------****-------------
> > > > + * clock task mult
> > > > + * @ mult = 2 | | |2 |3 | | | | |5 |6 | | |
> > > > + * @ mult = 4 | | | | |2|3| | | | | | | | | | |5|6| | | | | | |
> > > > + *
> > > > + */
> > > > +static inline void update_rq_clock_task_mult(struct rq *rq, s64 delta)
> > > > +{
> > > > + delta <<= READ_ONCE(sched_pelt_lshift);
> > > > +
> > > > + rq->clock_task_mult += delta;
> > > > +
> > > > + update_rq_clock_pelt(rq, delta);
> > > > +}
> > >
> > > Hurmph... I'd almost go write you something like
> > > static_call()/static_branch() but for immediates.
> > >
> > > That said; given there's only like 3 options, perhaps a few
> > > static_branch() instances work just fine ?
> >
> > Also, I'm not at all sure about exposing that as an official sysctl.
>
> Me too, I would even make it a boot time parameter so we can remove
> the new clock_task_mult clock and left shift clock_taslk or the delta
> before passing it to clock_pelt

I'll let folks in CC comment about their use-case in more details, but
there's definitely been an interest in tuning this thing at run-time
FWIW. Typically a larger half-life will be fine with predictable
workloads with little inputs from users (e.g. fullscreen video playback)
while a lower one can be preferred in highly interactive cases (games,
...). The transient state is fun to reason about, but it really
shouldn't be too common AFAIK.

With that said I'd quite like to see numbers to back that claim.
Measuring power while running a video (for instance) with various HL
values should help. And similarly it shouldn't be too hard to get
performance numbers.

2022-08-29 15:15:56

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On Mon, Aug 29, 2022 at 02:23:17PM +0000, Quentin Perret wrote:

> I'll let folks in CC comment about their use-case in more details, but
> there's definitely been an interest in tuning this thing at run-time

An interest and it making sense are two very distinct things that bear
no relation to one another in any way.

> FWIW. Typically a larger half-life will be fine with predictable
> workloads with little inputs from users (e.g. fullscreen video playback)
> while a lower one can be preferred in highly interactive cases (games,

As per always; consider the combined workload.

> ...). The transient state is fun to reason about, but it really
> shouldn't be too common AFAIK.

Once you give away control there is no taking it back, and userspace
*will* do stupid things and expect unicorns.

> With that said I'd quite like to see numbers to back that claim.
> Measuring power while running a video (for instance) with various HL
> values should help. And similarly it shouldn't be too hard to get
> performance numbers.

I'm thinking this all needs far more than mere numbers to justify.

2022-08-29 15:51:15

by Quentin Perret

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On Monday 29 Aug 2022 at 16:34:39 (+0200), Peter Zijlstra wrote:
> On Mon, Aug 29, 2022 at 02:23:17PM +0000, Quentin Perret wrote:
>
> > I'll let folks in CC comment about their use-case in more details, but
> > there's definitely been an interest in tuning this thing at run-time
>
> An interest and it making sense are two very distinct things that bear
> no relation to one another in any way.

And just to clarify something here as my first message was misleading,
the run-time tuning of PELT HLs has been used in some Android products
for a little while already, so it's more than just a mere 'interest'.

That doesn't close the debate on whether this makes any sense, but I
figured this should be mentioned :-)

2022-08-29 16:13:15

by Quentin Perret

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On Monday 29 Aug 2022 at 16:34:39 (+0200), Peter Zijlstra wrote:
> On Mon, Aug 29, 2022 at 02:23:17PM +0000, Quentin Perret wrote:
> > FWIW. Typically a larger half-life will be fine with predictable
> > workloads with little inputs from users (e.g. fullscreen video playback)
> > while a lower one can be preferred in highly interactive cases (games,
>
> As per always; consider the combined workload.

That's the problem of whoever is going to decide which HL should be
used, which would all fall in userspace policy land. And yes that
userspace agent would have to know quite a bit about what is going on in
the system to make 'sane' decisions, but that is the case in Android.

The fact that choosing the right HL for a combined workload is hard
(that userspace agent might want to just default to a safe value?)
doesn't mean there is no value in other cases though...

I mean, if we agree that tuning the PELT HL makes sense at all (even as
a cmdling arg, or sched_feat or whatever), there is only one small step
to say the right PELT HL should indeed depend on the workload you're
currently running (especially with _vastly_ different usage patterns
seen in mobile).

> > ...). The transient state is fun to reason about, but it really
> > shouldn't be too common AFAIK.
>
> Once you give away control there is no taking it back, and userspace
> *will* do stupid things and expect unicorns.

Sure. If you do stupid, at some point you get(/deserve) what you asked
for... We could perhaps rate-limit changes or something if we want to
make that clear?

2022-09-02 08:04:51

by Dietmar Eggemann

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On 29/08/2022 12:13, Vincent Guittot wrote:
> On Mon, 29 Aug 2022 at 12:03, Peter Zijlstra <[email protected]> wrote:
>>
>> On Mon, Aug 29, 2022 at 10:08:13AM +0200, Peter Zijlstra wrote:
>>> On Mon, Aug 29, 2022 at 07:54:50AM +0200, Dietmar Eggemann wrote:
>>>> From: Vincent Donnefort <[email protected]>

[...]

>>> Hurmph... I'd almost go write you something like
>>> static_call()/static_branch() but for immediates.
>>>
>>> That said; given there's only like 3 options, perhaps a few
>>> static_branch() instances work just fine ?
>>
>> Also, I'm not at all sure about exposing that as an official sysctl.
>
> Me too, I would even make it a boot time parameter so we can remove

Isn't a sched feature even less official than a boot parameter?
But AFAIK at least some of the Android folks want to change this during
runtime and they don't have debugfs mounted.

> the new clock_task_mult clock and left shift clock_taslk or the delta
> before passing it to clock_pelt

We still need rq_clock_task_mult(rq), i.e. `rq->clock_task_mult` in
_update_idle_rq_clock_pelt() though.

2022-09-02 08:11:31

by Dietmar Eggemann

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On 29/08/2022 12:02, Peter Zijlstra wrote:
> On Mon, Aug 29, 2022 at 10:08:13AM +0200, Peter Zijlstra wrote:
>> On Mon, Aug 29, 2022 at 07:54:50AM +0200, Dietmar Eggemann wrote:
>>> From: Vincent Donnefort <[email protected]>

[...]

>>> +extern unsigned int sched_pelt_lshift;
>>> +
>>> +/*
>>> + * absolute time |1 |2 |3 |4 |5 |6 |
>>> + * @ mult = 1 --------****************--------****************-
>>> + * @ mult = 2 --------********----------------********---------
>>> + * @ mult = 4 --------****--------------------****-------------
>>> + * clock task mult
>>> + * @ mult = 2 | | |2 |3 | | | | |5 |6 | | |
>>> + * @ mult = 4 | | | | |2|3| | | | | | | | | | |5|6| | | | | | |
>>> + *
>>> + */
>>> +static inline void update_rq_clock_task_mult(struct rq *rq, s64 delta)
>>> +{
>>> + delta <<= READ_ONCE(sched_pelt_lshift);
>>> +
>>> + rq->clock_task_mult += delta;
>>> +
>>> + update_rq_clock_pelt(rq, delta);
>>> +}
>>
>> Hurmph... I'd almost go write you something like
>> static_call()/static_branch() but for immediates.
>>
>> That said; given there's only like 3 options, perhaps a few
>> static_branch() instances work just fine ?
>
> Also, I'm not at all sure about exposing that as an official sysctl.
>
> How about something like so?

[...]

> void update_rq_clock(struct rq *rq)
> --- a/kernel/sched/features.h
> +++ b/kernel/sched/features.h
> @@ -101,3 +101,6 @@ SCHED_FEAT(LATENCY_WARN, false)
>
> SCHED_FEAT(ALT_PERIOD, true)
> SCHED_FEAT(BASE_SLICE, true)
> +
> +SCHED_FEAT(PELT_M2, false)
> +SCHED_FEAT(PELT_M4, false)

The sched features interface would definitely be much less official but
I think it's not useful for Android since they don't mount debugfs anymore.

[...]

2022-09-02 09:02:46

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On Fri, Sep 02, 2022 at 09:53:22AM +0200, Dietmar Eggemann wrote:
> > void update_rq_clock(struct rq *rq)
> > --- a/kernel/sched/features.h
> > +++ b/kernel/sched/features.h
> > @@ -101,3 +101,6 @@ SCHED_FEAT(LATENCY_WARN, false)
> >
> > SCHED_FEAT(ALT_PERIOD, true)
> > SCHED_FEAT(BASE_SLICE, true)
> > +
> > +SCHED_FEAT(PELT_M2, false)
> > +SCHED_FEAT(PELT_M4, false)
>
> The sched features interface would definitely be much less official but
> I think it's not useful for Android since they don't mount debugfs anymore.

Well, they can haz a small patch that sets either one or both to true,
no?

2022-09-02 09:30:24

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On Fri, Sep 02, 2022 at 09:53:50AM +0200, Dietmar Eggemann wrote:

> But AFAIK at least some of the Android folks want to change this during
> runtime and they don't have debugfs mounted.

They want; I want an explanation of what exact problem is fixed how ;-)

2022-09-06 06:01:54

by Vincent Guittot

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On Fri, 2 Sept 2022 at 09:54, Dietmar Eggemann <[email protected]> wrote:
>
> On 29/08/2022 12:13, Vincent Guittot wrote:
> > On Mon, 29 Aug 2022 at 12:03, Peter Zijlstra <[email protected]> wrote:
> >>
> >> On Mon, Aug 29, 2022 at 10:08:13AM +0200, Peter Zijlstra wrote:
> >>> On Mon, Aug 29, 2022 at 07:54:50AM +0200, Dietmar Eggemann wrote:
> >>>> From: Vincent Donnefort <[email protected]>
>
> [...]
>
> >>> Hurmph... I'd almost go write you something like
> >>> static_call()/static_branch() but for immediates.
> >>>
> >>> That said; given there's only like 3 options, perhaps a few
> >>> static_branch() instances work just fine ?
> >>
> >> Also, I'm not at all sure about exposing that as an official sysctl.
> >
> > Me too, I would even make it a boot time parameter so we can remove
>
> Isn't a sched feature even less official than a boot parameter?
> But AFAIK at least some of the Android folks want to change this during
> runtime and they don't have debugfs mounted.
>
> > the new clock_task_mult clock and left shift clock_taslk or the delta
> > before passing it to clock_pelt
>
> We still need rq_clock_task_mult(rq), i.e. `rq->clock_task_mult` in
> _update_idle_rq_clock_pelt() though.

Why ? If the mult is defined at boot we just have to use
"rq_clock_task(rq) << mult" instead of rq_clock_task(rq) when updating
clock_pelt

2022-09-08 07:38:26

by Dietmar Eggemann

[permalink] [raw]

Subject: Re: [RFC PATCH 1/1] sched/pelt: Introduce PELT multiplier

On 06/09/2022 07:49, Vincent Guittot wrote:
> On Fri, 2 Sept 2022 at 09:54, Dietmar Eggemann <[email protected]> wrote:
>>
>> On 29/08/2022 12:13, Vincent Guittot wrote:
>>> On Mon, 29 Aug 2022 at 12:03, Peter Zijlstra <[email protected]> wrote:
>>>>
>>>> On Mon, Aug 29, 2022 at 10:08:13AM +0200, Peter Zijlstra wrote:
>>>>> On Mon, Aug 29, 2022 at 07:54:50AM +0200, Dietmar Eggemann wrote:
>>>>>> From: Vincent Donnefort <[email protected]>

[...]

>> We still need rq_clock_task_mult(rq), i.e. `rq->clock_task_mult` in
>> _update_idle_rq_clock_pelt() though.
>
> Why ? If the mult is defined at boot we just have to use
> "rq_clock_task(rq) << mult" instead of rq_clock_task(rq) when updating
> clock_pelt

Makes sense! Agreed.