Thermal pressure follows pelt signas which means the
decay period for thermal pressure is the default pelt
decay period. Depending on soc charecteristics and thermal
activity, it might be beneficial to decay thermal pressure
slower, but still in-tune with the pelt signals.
One way to achieve this is to provide a command line parameter
to set a decay shift parameter to an integer between 0 and 10.
Signed-off-by: Thara Gopinath <[email protected]>
---
v4->v5:
- Changed _coeff to _shift as per review comments on the list.
Documentation/admin-guide/kernel-parameters.txt | 5 +++++
kernel/sched/fair.c | 25 +++++++++++++++++++++++--
2 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index c82f87c..0b8f55e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4281,6 +4281,11 @@
incurs a small amount of overhead in the scheduler
but is useful for debugging and performance tuning.
+ sched_thermal_decay_shift=
+ [KNL, SMP] Set decay shift for thermal pressure signal.
+ Format: integer betweer 0 and 10
+ Default is 0.
+
skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate
xtime_lock contention on larger systems, and/or RCU lock
contention on all systems with CONFIG_MAXSMP set.
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5f6c371..61a020b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -91,6 +91,18 @@ const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
* and maximum available capacity due to thermal events.
*/
static DEFINE_PER_CPU(unsigned long, thermal_pressure);
+/**
+ * By default the decay is the default pelt decay period.
+ * The decay shift can change the decay period in
+ * multiples of 32.
+ * Decay shift Decay period(ms)
+ * 0 32
+ * 1 64
+ * 2 128
+ * 3 256
+ * 4 512
+ */
+static int sched_thermal_decay_shift;
static void trigger_thermal_pressure_average(struct rq *rq);
@@ -10435,6 +10447,15 @@ void update_thermal_pressure(int cpu, unsigned long capped_capacity)
delta = arch_scale_cpu_capacity(cpu) - capped_capacity;
per_cpu(thermal_pressure, cpu) = delta;
}
+
+static int __init setup_sched_thermal_decay_shift(char *str)
+{
+ if (kstrtoint(str, 0, &sched_thermal_decay_shift))
+ pr_warn("Unable to set scheduler thermal pressure decay shift parameter\n");
+
+ return 1;
+}
+__setup("sched_thermal_decay_shift=", setup_sched_thermal_decay_shift);
#endif
/**
@@ -10444,8 +10465,8 @@ void update_thermal_pressure(int cpu, unsigned long capped_capacity)
static void trigger_thermal_pressure_average(struct rq *rq)
{
#ifdef CONFIG_SMP
- update_thermal_load_avg(rq_clock_task(rq), rq,
- per_cpu(thermal_pressure, cpu_of(rq)));
+ update_thermal_load_avg(rq_clock_task(rq) >> sched_thermal_decay_shift,
+ rq, per_cpu(thermal_pressure, cpu_of(rq)));
#endif
}
--
2.1.4
Le Tuesday 05 Nov 2019 ? 13:49:46 (-0500), Thara Gopinath a ?crit :
> Thermal pressure follows pelt signas which means the
> decay period for thermal pressure is the default pelt
> decay period. Depending on soc charecteristics and thermal
> activity, it might be beneficial to decay thermal pressure
> slower, but still in-tune with the pelt signals.
> One way to achieve this is to provide a command line parameter
> to set a decay shift parameter to an integer between 0 and 10.
>
> Signed-off-by: Thara Gopinath <[email protected]>
> ---
>
> v4->v5:
> - Changed _coeff to _shift as per review comments on the list.
>
> Documentation/admin-guide/kernel-parameters.txt | 5 +++++
> kernel/sched/fair.c | 25 +++++++++++++++++++++++--
> 2 files changed, 28 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index c82f87c..0b8f55e 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -4281,6 +4281,11 @@
> incurs a small amount of overhead in the scheduler
> but is useful for debugging and performance tuning.
>
> + sched_thermal_decay_shift=
> + [KNL, SMP] Set decay shift for thermal pressure signal.
> + Format: integer betweer 0 and 10
> + Default is 0.
> +
> skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate
> xtime_lock contention on larger systems, and/or RCU lock
> contention on all systems with CONFIG_MAXSMP set.
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 5f6c371..61a020b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -91,6 +91,18 @@ const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
> * and maximum available capacity due to thermal events.
> */
> static DEFINE_PER_CPU(unsigned long, thermal_pressure);
> +/**
> + * By default the decay is the default pelt decay period.
> + * The decay shift can change the decay period in
> + * multiples of 32.
> + * Decay shift Decay period(ms)
> + * 0 32
> + * 1 64
> + * 2 128
> + * 3 256
> + * 4 512
> + */
> +static int sched_thermal_decay_shift;
>
> static void trigger_thermal_pressure_average(struct rq *rq);
>
> @@ -10435,6 +10447,15 @@ void update_thermal_pressure(int cpu, unsigned long capped_capacity)
> delta = arch_scale_cpu_capacity(cpu) - capped_capacity;
> per_cpu(thermal_pressure, cpu) = delta;
> }
> +
> +static int __init setup_sched_thermal_decay_shift(char *str)
> +{
> + if (kstrtoint(str, 0, &sched_thermal_decay_shift))
> + pr_warn("Unable to set scheduler thermal pressure decay shift parameter\n");
> +
> + return 1;
> +}
> +__setup("sched_thermal_decay_shift=", setup_sched_thermal_decay_shift);
> #endif
>
> /**
> @@ -10444,8 +10465,8 @@ void update_thermal_pressure(int cpu, unsigned long capped_capacity)
> static void trigger_thermal_pressure_average(struct rq *rq)
> {
> #ifdef CONFIG_SMP
> - update_thermal_load_avg(rq_clock_task(rq), rq,
> - per_cpu(thermal_pressure, cpu_of(rq)));
> + update_thermal_load_avg(rq_clock_task(rq) >> sched_thermal_decay_shift,
> + rq, per_cpu(thermal_pressure, cpu_of(rq)));
Would be better to create
+static inline u64 rq_clock_thermal(struct rq *rq)
+{
+ lockdep_assert_held(&rq->lock);
+ assert_clock_updated(rq);
+
+ return rq_clock_task(rq) >> sched_thermal_decay_shift;
+}
+
and use it when calling update_thermal_load_avg(rq_clock_thermal(rq)...
> #endif
> }
>
> --
> 2.1.4
>
On 07/11/2019 11:49, Vincent Guittot wrote:
> Le Tuesday 05 Nov 2019 à 13:49:46 (-0500), Thara Gopinath a écrit :
[...]
>> /**
>> @@ -10444,8 +10465,8 @@ void update_thermal_pressure(int cpu, unsigned long capped_capacity)
>> static void trigger_thermal_pressure_average(struct rq *rq)
>> {
>> #ifdef CONFIG_SMP
>> - update_thermal_load_avg(rq_clock_task(rq), rq,
>> - per_cpu(thermal_pressure, cpu_of(rq)));
>> + update_thermal_load_avg(rq_clock_task(rq) >> sched_thermal_decay_shift,
>> + rq, per_cpu(thermal_pressure, cpu_of(rq)));
>
> Would be better to create
>
> +static inline u64 rq_clock_thermal(struct rq *rq)
> +{
> + lockdep_assert_held(&rq->lock);
> + assert_clock_updated(rq);
IMHO, the asserts can be skipped here since they're already done in
rq_clock_task().
> + return rq_clock_task(rq) >> sched_thermal_decay_shift;
> +}
On Wed, Nov 6, 2019 at 12:20 AM Thara Gopinath
<[email protected]> wrote:
>
> Thermal pressure follows pelt signas which means the
> decay period for thermal pressure is the default pelt
> decay period. Depending on soc charecteristics and thermal
> activity, it might be beneficial to decay thermal pressure
> slower, but still in-tune with the pelt signals.
> One way to achieve this is to provide a command line parameter
> to set a decay shift parameter to an integer between 0 and 10.
>
> Signed-off-by: Thara Gopinath <[email protected]>
> ---
>
> v4->v5:
> - Changed _coeff to _shift as per review comments on the list.
>
> Documentation/admin-guide/kernel-parameters.txt | 5 +++++
> kernel/sched/fair.c | 25 +++++++++++++++++++++++--
> 2 files changed, 28 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index c82f87c..0b8f55e 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -4281,6 +4281,11 @@
> incurs a small amount of overhead in the scheduler
> but is useful for debugging and performance tuning.
>
> + sched_thermal_decay_shift=
> + [KNL, SMP] Set decay shift for thermal pressure signal.
> + Format: integer betweer 0 and 10
> + Default is 0.
> +
> skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate
> xtime_lock contention on larger systems, and/or RCU lock
> contention on all systems with CONFIG_MAXSMP set.
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 5f6c371..61a020b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -91,6 +91,18 @@ const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
> * and maximum available capacity due to thermal events.
> */
> static DEFINE_PER_CPU(unsigned long, thermal_pressure);
> +/**
> + * By default the decay is the default pelt decay period.
> + * The decay shift can change the decay period in
> + * multiples of 32.
> + * Decay shift Decay period(ms)
> + * 0 32
> + * 1 64
> + * 2 128
> + * 3 256
> + * 4 512
> + */
> +static int sched_thermal_decay_shift;
>
> static void trigger_thermal_pressure_average(struct rq *rq);
>
> @@ -10435,6 +10447,15 @@ void update_thermal_pressure(int cpu, unsigned long capped_capacity)
> delta = arch_scale_cpu_capacity(cpu) - capped_capacity;
> per_cpu(thermal_pressure, cpu) = delta;
> }
> +
> +static int __init setup_sched_thermal_decay_shift(char *str)
> +{
> + if (kstrtoint(str, 0, &sched_thermal_decay_shift))
> + pr_warn("Unable to set scheduler thermal pressure decay shift parameter\n");
You're reading straight from the cmdline into a kernel variable w/o
any bounds checking. Perhaps use clamp or clamp_val to make sure it is
between 0 and 10?
> +
> + return 1;
> +}
> +__setup("sched_thermal_decay_shift=", setup_sched_thermal_decay_shift);
> #endif
>
> /**
> @@ -10444,8 +10465,8 @@ void update_thermal_pressure(int cpu, unsigned long capped_capacity)
> static void trigger_thermal_pressure_average(struct rq *rq)
> {
> #ifdef CONFIG_SMP
> - update_thermal_load_avg(rq_clock_task(rq), rq,
> - per_cpu(thermal_pressure, cpu_of(rq)));
> + update_thermal_load_avg(rq_clock_task(rq) >> sched_thermal_decay_shift,
> + rq, per_cpu(thermal_pressure, cpu_of(rq)));
> #endif
> }
>
> --
> 2.1.4
>