2020-12-14 20:15:26

by Rafael J. Wysocki

[permalink] [raw]
Subject: [PATCH v2 3/3] cpufreq: intel_pstate: Implement the ->adjust_perf() callback

From: Rafael J. Wysocki <[email protected]>

Make intel_pstate expose the ->adjust_perf() callback when it
operates in the passive mode with HWP enabled which causes the
schedutil governor to use that callback instead of ->fast_switch().

The minimum and target performance-level values passed by the
governor to ->adjust_perf() are converted to HWP.REQ.MIN and
HWP.REQ.DESIRED, respectively, which allows the processor to
adjust its configuration to maximize energy-efficiency while
providing sufficient capacity.

Signed-off-by: Rafael J. Wysocki <[email protected]>
---

v1 -> v2:
- No changes.

---
drivers/cpufreq/intel_pstate.c | 70 +++++++++++++++++++++++++++++++++--------
1 file changed, 58 insertions(+), 12 deletions(-)

Index: linux-pm/drivers/cpufreq/intel_pstate.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/intel_pstate.c
+++ linux-pm/drivers/cpufreq/intel_pstate.c
@@ -2526,20 +2526,19 @@ static void intel_cpufreq_trace(struct c
fp_toint(cpu->iowait_boost * 100));
}

-static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 target_pstate,
- bool strict, bool fast_switch)
+static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 min, u32 max,
+ u32 desired, bool fast_switch)
{
u64 prev = READ_ONCE(cpu->hwp_req_cached), value = prev;

value &= ~HWP_MIN_PERF(~0L);
- value |= HWP_MIN_PERF(target_pstate);
+ value |= HWP_MIN_PERF(min);

- /*
- * The entire MSR needs to be updated in order to update the HWP min
- * field in it, so opportunistically update the max too if needed.
- */
value &= ~HWP_MAX_PERF(~0L);
- value |= HWP_MAX_PERF(strict ? target_pstate : cpu->max_perf_ratio);
+ value |= HWP_MAX_PERF(max);
+
+ value &= ~HWP_DESIRED_PERF(~0L);
+ value |= HWP_DESIRED_PERF(desired);

if (value == prev)
return;
@@ -2569,11 +2568,15 @@ static int intel_cpufreq_update_pstate(s
int old_pstate = cpu->pstate.current_pstate;

target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
- if (hwp_active)
- intel_cpufreq_adjust_hwp(cpu, target_pstate,
- policy->strict_target, fast_switch);
- else if (target_pstate != old_pstate)
+ if (hwp_active) {
+ int max_pstate = policy->strict_target ?
+ target_pstate : cpu->max_perf_ratio;
+
+ intel_cpufreq_adjust_hwp(cpu, target_pstate, max_pstate, 0,
+ fast_switch);
+ } else if (target_pstate != old_pstate) {
intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, fast_switch);
+ }

cpu->pstate.current_pstate = target_pstate;

@@ -2634,6 +2637,47 @@ static unsigned int intel_cpufreq_fast_s
return target_pstate * cpu->pstate.scaling;
}

+static void intel_cpufreq_adjust_perf(unsigned int cpunum,
+ unsigned long min_perf,
+ unsigned long target_perf,
+ unsigned long capacity)
+{
+ struct cpudata *cpu = all_cpu_data[cpunum];
+ int old_pstate = cpu->pstate.current_pstate;
+ int cap_pstate, min_pstate, max_pstate, target_pstate;
+
+ update_turbo_state();
+ cap_pstate = global.turbo_disabled ? cpu->pstate.max_pstate :
+ cpu->pstate.turbo_pstate;
+
+ /* Optimization: Avoid unnecessary divisions. */
+
+ target_pstate = cap_pstate;
+ if (target_perf < capacity)
+ target_pstate = DIV_ROUND_UP(cap_pstate * target_perf, capacity);
+
+ min_pstate = cap_pstate;
+ if (min_perf < capacity)
+ min_pstate = DIV_ROUND_UP(cap_pstate * min_perf, capacity);
+
+ if (min_pstate < cpu->pstate.min_pstate)
+ min_pstate = cpu->pstate.min_pstate;
+
+ if (min_pstate < cpu->min_perf_ratio)
+ min_pstate = cpu->min_perf_ratio;
+
+ max_pstate = min(cap_pstate, cpu->max_perf_ratio);
+ if (max_pstate < min_pstate)
+ max_pstate = min_pstate;
+
+ target_pstate = clamp_t(int, target_pstate, min_pstate, max_pstate);
+
+ intel_cpufreq_adjust_hwp(cpu, min_pstate, max_pstate, target_pstate, true);
+
+ cpu->pstate.current_pstate = target_pstate;
+ intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate);
+}
+
static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
int max_state, turbo_max, min_freq, max_freq, ret;
@@ -3032,6 +3076,8 @@ static int __init intel_pstate_init(void
intel_pstate.attr = hwp_cpufreq_attrs;
intel_cpufreq.attr = hwp_cpufreq_attrs;
intel_cpufreq.flags |= CPUFREQ_NEED_UPDATE_LIMITS;
+ intel_cpufreq.fast_switch = NULL;
+ intel_cpufreq.adjust_perf = intel_cpufreq_adjust_perf;
if (!default_driver)
default_driver = &intel_pstate;





2020-12-15 03:36:13

by srinivas pandruvada

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] cpufreq: intel_pstate: Implement the ->adjust_perf() callback

On Mon, 2020-12-14 at 21:09 +0100, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <[email protected]>
>
> Make intel_pstate expose the ->adjust_perf() callback when it
> operates in the passive mode with HWP enabled which causes the
> schedutil governor to use that callback instead of ->fast_switch().
>
> The minimum and target performance-level values passed by the
> governor to ->adjust_perf() are converted to HWP.REQ.MIN and
> HWP.REQ.DESIRED, respectively, which allows the processor to
> adjust its configuration to maximize energy-efficiency while
> providing sufficient capacity.
>
> Signed-off-by: Rafael J. Wysocki <[email protected]>
Acked-by: Srinivas Pandruvada <[email protected]>

> ---
>
> v1 -> v2:
>  - No changes.
>
> ---
>  drivers/cpufreq/intel_pstate.c |   70
> +++++++++++++++++++++++++++++++++--------
>  1 file changed, 58 insertions(+), 12 deletions(-)
>
> Index: linux-pm/drivers/cpufreq/intel_pstate.c
> ===================================================================
> --- linux-pm.orig/drivers/cpufreq/intel_pstate.c
> +++ linux-pm/drivers/cpufreq/intel_pstate.c
> @@ -2526,20 +2526,19 @@ static void intel_cpufreq_trace(struct c
>                 fp_toint(cpu->iowait_boost * 100));
>  }
>  
> -static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32
> target_pstate,
> -                                    bool strict, bool fast_switch)
> +static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 min,
> u32 max,
> +                                    u32 desired, bool fast_switch)
>  {
>         u64 prev = READ_ONCE(cpu->hwp_req_cached), value = prev;
>  
>         value &= ~HWP_MIN_PERF(~0L);
> -       value |= HWP_MIN_PERF(target_pstate);
> +       value |= HWP_MIN_PERF(min);
>  
> -       /*
> -        * The entire MSR needs to be updated in order to update the
> HWP min
> -        * field in it, so opportunistically update the max too if
> needed.
> -        */
>         value &= ~HWP_MAX_PERF(~0L);
> -       value |= HWP_MAX_PERF(strict ? target_pstate : cpu-
> >max_perf_ratio);
> +       value |= HWP_MAX_PERF(max);
> +
> +       value &= ~HWP_DESIRED_PERF(~0L);
> +       value |= HWP_DESIRED_PERF(desired);
>  
>         if (value == prev)
>                 return;
> @@ -2569,11 +2568,15 @@ static int intel_cpufreq_update_pstate(s
>         int old_pstate = cpu->pstate.current_pstate;
>  
>         target_pstate = intel_pstate_prepare_request(cpu,
> target_pstate);
> -       if (hwp_active)
> -               intel_cpufreq_adjust_hwp(cpu, target_pstate,
> -                                        policy->strict_target,
> fast_switch);
> -       else if (target_pstate != old_pstate)
> +       if (hwp_active) {
> +               int max_pstate = policy->strict_target ?
> +                                       target_pstate : cpu-
> >max_perf_ratio;
> +
> +               intel_cpufreq_adjust_hwp(cpu, target_pstate,
> max_pstate, 0,
> +                                        fast_switch);
> +       } else if (target_pstate != old_pstate) {
>                 intel_cpufreq_adjust_perf_ctl(cpu, target_pstate,
> fast_switch);
> +       }
>  
>         cpu->pstate.current_pstate = target_pstate;
>  
> @@ -2634,6 +2637,47 @@ static unsigned int intel_cpufreq_fast_s
>         return target_pstate * cpu->pstate.scaling;
>  }
>  
> +static void intel_cpufreq_adjust_perf(unsigned int cpunum,
> +                                     unsigned long min_perf,
> +                                     unsigned long target_perf,
> +                                     unsigned long capacity)
> +{
> +       struct cpudata *cpu = all_cpu_data[cpunum];
> +       int old_pstate = cpu->pstate.current_pstate;
> +       int cap_pstate, min_pstate, max_pstate, target_pstate;
> +
> +       update_turbo_state();
> +       cap_pstate = global.turbo_disabled ? cpu->pstate.max_pstate :
> +                                            cpu-
> >pstate.turbo_pstate;
> +
> +       /* Optimization: Avoid unnecessary divisions. */
> +
> +       target_pstate = cap_pstate;
> +       if (target_perf < capacity)
> +               target_pstate = DIV_ROUND_UP(cap_pstate *
> target_perf, capacity);
> +
> +       min_pstate = cap_pstate;
> +       if (min_perf < capacity)
> +               min_pstate = DIV_ROUND_UP(cap_pstate * min_perf,
> capacity);
> +
> +       if (min_pstate < cpu->pstate.min_pstate)
> +               min_pstate = cpu->pstate.min_pstate;
> +
> +       if (min_pstate < cpu->min_perf_ratio)
> +               min_pstate = cpu->min_perf_ratio;
> +
> +       max_pstate = min(cap_pstate, cpu->max_perf_ratio);
> +       if (max_pstate < min_pstate)
> +               max_pstate = min_pstate;
> +
> +       target_pstate = clamp_t(int, target_pstate, min_pstate,
> max_pstate);
> +
> +       intel_cpufreq_adjust_hwp(cpu, min_pstate, max_pstate,
> target_pstate, true);
> +
> +       cpu->pstate.current_pstate = target_pstate;
> +       intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH,
> old_pstate);
> +}
> +
>  static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
>  {
>         int max_state, turbo_max, min_freq, max_freq, ret;
> @@ -3032,6 +3076,8 @@ static int __init intel_pstate_init(void
>                         intel_pstate.attr = hwp_cpufreq_attrs;
>                         intel_cpufreq.attr = hwp_cpufreq_attrs;
>                         intel_cpufreq.flags |=
> CPUFREQ_NEED_UPDATE_LIMITS;
> +                       intel_cpufreq.fast_switch = NULL;
> +                       intel_cpufreq.adjust_perf =
> intel_cpufreq_adjust_perf;
>                         if (!default_driver)
>                                 default_driver = &intel_pstate;
>  
>
>
>