Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20;
MIME-Version: 1.0
References: <20220415133356.179706384@linutronix.de> <20220415161206.478362457@linutronix.de>
In-Reply-To: <20220415161206.478362457@linutronix.de>
From:   "Rafael J. Wysocki" <rafael@kernel.org>
Date:   Tue, 19 Apr 2022 17:40:31 +0200
Message-ID: <CAJZ5v0h_Zed_0ESv2A3XJ+F3e5qAdqu6gR9xiiBnCF59cN4KCQ@mail.gmail.com>
Subject: Re: [patch 02/10] x86/smp: Move APERF/MPERF code where it belongs
To:     Thomas Gleixner <tglx@linutronix.de>
Cc:     LKML <linux-kernel@vger.kernel.org>,
        "the arch/x86 maintainers" <x86@kernel.org>,
        "Rafael J. Wysocki" <rafael@kernel.org>,
        Linux PM <linux-pm@vger.kernel.org>,
        Eric Dumazet <edumazet@google.com>,
        "Paul E. McKenney" <paulmck@kernel.org>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk

On Fri, Apr 15, 2022 at 9:19 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> as this can share code with the preexisting APERF/MPERF code.
>
> No functional change.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

> ---
>  arch/x86/kernel/cpu/aperfmperf.c |  366 ++++++++++++++++++++++++++++++++++++++-
>  arch/x86/kernel/smpboot.c        |  355 -------------------------------------
>  2 files changed, 362 insertions(+), 359 deletions(-)
>
> --- a/arch/x86/kernel/cpu/aperfmperf.c
> +++ b/arch/x86/kernel/cpu/aperfmperf.c
> @@ -6,15 +6,19 @@
>   * Copyright (C) 2017 Intel Corp.
>   * Author: Len Brown <len.brown@intel.com>
>   */
> -
> +#include <linux/cpufreq.h>
>  #include <linux/delay.h>
>  #include <linux/ktime.h>
>  #include <linux/math64.h>
>  #include <linux/percpu.h>
> -#include <linux/cpufreq.h>
> -#include <linux/smp.h>
> -#include <linux/sched/isolation.h>
>  #include <linux/rcupdate.h>
> +#include <linux/sched/isolation.h>
> +#include <linux/sched/topology.h>
> +#include <linux/smp.h>
> +#include <linux/syscore_ops.h>
> +
> +#include <asm/cpu_device_id.h>
> +#include <asm/intel-family.h>
>
>  #include "cpu.h"
>
> @@ -152,3 +156,357 @@ unsigned int arch_freq_get_on_cpu(int cp
>
>         return per_cpu(samples.khz, cpu);
>  }
> +
> +#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
> +/*
> + * APERF/MPERF frequency ratio computation.
> + *
> + * The scheduler wants to do frequency invariant accounting and needs a <1
> + * ratio to account for the 'current' frequency, corresponding to
> + * freq_curr / freq_max.
> + *
> + * Since the frequency freq_curr on x86 is controlled by micro-controller and
> + * our P-state setting is little more than a request/hint, we need to observe
> + * the effective frequency 'BusyMHz', i.e. the average frequency over a time
> + * interval after discarding idle time. This is given by:
> + *
> + *   BusyMHz = delta_APERF / delta_MPERF * freq_base
> + *
> + * where freq_base is the max non-turbo P-state.
> + *
> + * The freq_max term has to be set to a somewhat arbitrary value, because we
> + * can't know which turbo states will be available at a given point in time:
> + * it all depends on the thermal headroom of the entire package. We set it to
> + * the turbo level with 4 cores active.
> + *
> + * Benchmarks show that's a good compromise between the 1C turbo ratio
> + * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
> + * which would ignore the entire turbo range (a conspicuous part, making
> + * freq_curr/freq_max always maxed out).
> + *
> + * An exception to the heuristic above is the Atom uarch, where we choose the
> + * highest turbo level for freq_max since Atom's are generally oriented towards
> + * power efficiency.
> + *
> + * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
> + * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
> + */
> +
> +DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
> +
> +static DEFINE_PER_CPU(u64, arch_prev_aperf);
> +static DEFINE_PER_CPU(u64, arch_prev_mperf);
> +static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
> +static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
> +
> +void arch_set_max_freq_ratio(bool turbo_disabled)
> +{
> +       arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
> +                                       arch_turbo_freq_ratio;
> +}
> +EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
> +
> +static bool turbo_disabled(void)
> +{
> +       u64 misc_en;
> +       int err;
> +
> +       err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
> +       if (err)
> +               return false;
> +
> +       return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
> +}
> +
> +static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
> +{
> +       int err;
> +
> +       err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
> +       if (err)
> +               return false;
> +
> +       err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
> +       if (err)
> +               return false;
> +
> +       *base_freq = (*base_freq >> 16) & 0x3F;     /* max P state */
> +       *turbo_freq = *turbo_freq & 0x3F;           /* 1C turbo    */
> +
> +       return true;
> +}
> +
> +#define X86_MATCH(model)                                       \
> +       X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6,            \
> +               INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
> +
> +static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
> +       X86_MATCH(XEON_PHI_KNL),
> +       X86_MATCH(XEON_PHI_KNM),
> +       {}
> +};
> +
> +static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
> +       X86_MATCH(SKYLAKE_X),
> +       {}
> +};
> +
> +static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
> +       X86_MATCH(ATOM_GOLDMONT),
> +       X86_MATCH(ATOM_GOLDMONT_D),
> +       X86_MATCH(ATOM_GOLDMONT_PLUS),
> +       {}
> +};
> +
> +static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
> +                               int num_delta_fratio)
> +{
> +       int fratio, delta_fratio, found;
> +       int err, i;
> +       u64 msr;
> +
> +       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
> +       if (err)
> +               return false;
> +
> +       *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
> +
> +       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
> +       if (err)
> +               return false;
> +
> +       fratio = (msr >> 8) & 0xFF;
> +       i = 16;
> +       found = 0;
> +       do {
> +               if (found >= num_delta_fratio) {
> +                       *turbo_freq = fratio;
> +                       return true;
> +               }
> +
> +               delta_fratio = (msr >> (i + 5)) & 0x7;
> +
> +               if (delta_fratio) {
> +                       found += 1;
> +                       fratio -= delta_fratio;
> +               }
> +
> +               i += 8;
> +       } while (i < 64);
> +
> +       return true;
> +}
> +
> +static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
> +{
> +       u64 ratios, counts;
> +       u32 group_size;
> +       int err, i;
> +
> +       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
> +       if (err)
> +               return false;
> +
> +       *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
> +
> +       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
> +       if (err)
> +               return false;
> +
> +       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
> +       if (err)
> +               return false;
> +
> +       for (i = 0; i < 64; i += 8) {
> +               group_size = (counts >> i) & 0xFF;
> +               if (group_size >= size) {
> +                       *turbo_freq = (ratios >> i) & 0xFF;
> +                       return true;
> +               }
> +       }
> +
> +       return false;
> +}
> +
> +static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
> +{
> +       u64 msr;
> +       int err;
> +
> +       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
> +       if (err)
> +               return false;
> +
> +       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
> +       if (err)
> +               return false;
> +
> +       *base_freq = (*base_freq >> 8) & 0xFF;    /* max P state */
> +       *turbo_freq = (msr >> 24) & 0xFF;         /* 4C turbo    */
> +
> +       /* The CPU may have less than 4 cores */
> +       if (!*turbo_freq)
> +               *turbo_freq = msr & 0xFF;         /* 1C turbo    */
> +
> +       return true;
> +}
> +
> +static bool intel_set_max_freq_ratio(void)
> +{
> +       u64 base_freq, turbo_freq;
> +       u64 turbo_ratio;
> +
> +       if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
> +               goto out;
> +
> +       if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
> +           skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
> +               goto out;
> +
> +       if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
> +           knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
> +               goto out;
> +
> +       if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
> +           skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
> +               goto out;
> +
> +       if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
> +               goto out;
> +
> +       return false;
> +
> +out:
> +       /*
> +        * Some hypervisors advertise X86_FEATURE_APERFMPERF
> +        * but then fill all MSR's with zeroes.
> +        * Some CPUs have turbo boost but don't declare any turbo ratio
> +        * in MSR_TURBO_RATIO_LIMIT.
> +        */
> +       if (!base_freq || !turbo_freq) {
> +               pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
> +               return false;
> +       }
> +
> +       turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
> +       if (!turbo_ratio) {
> +               pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
> +               return false;
> +       }
> +
> +       arch_turbo_freq_ratio = turbo_ratio;
> +       arch_set_max_freq_ratio(turbo_disabled());
> +
> +       return true;
> +}
> +
> +static void init_counter_refs(void)
> +{
> +       u64 aperf, mperf;
> +
> +       rdmsrl(MSR_IA32_APERF, aperf);
> +       rdmsrl(MSR_IA32_MPERF, mperf);
> +
> +       this_cpu_write(arch_prev_aperf, aperf);
> +       this_cpu_write(arch_prev_mperf, mperf);
> +}
> +
> +#ifdef CONFIG_PM_SLEEP
> +static struct syscore_ops freq_invariance_syscore_ops = {
> +       .resume = init_counter_refs,
> +};
> +
> +static void register_freq_invariance_syscore_ops(void)
> +{
> +       /* Bail out if registered already. */
> +       if (freq_invariance_syscore_ops.node.prev)
> +               return;
> +
> +       register_syscore_ops(&freq_invariance_syscore_ops);
> +}
> +#else
> +static inline void register_freq_invariance_syscore_ops(void) {}
> +#endif
> +
> +void init_freq_invariance(bool secondary, bool cppc_ready)
> +{
> +       bool ret = false;
> +
> +       if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
> +               return;
> +
> +       if (secondary) {
> +               if (static_branch_likely(&arch_scale_freq_key)) {
> +                       init_counter_refs();
> +               }
> +               return;
> +       }
> +
> +       if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
> +               ret = intel_set_max_freq_ratio();
> +       else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
> +               if (!cppc_ready) {
> +                       return;
> +               }
> +               ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
> +       }
> +
> +       if (ret) {
> +               init_counter_refs();
> +               static_branch_enable(&arch_scale_freq_key);
> +               register_freq_invariance_syscore_ops();
> +               pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
> +       } else {
> +               pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
> +       }
> +}
> +
> +static void disable_freq_invariance_workfn(struct work_struct *work)
> +{
> +       static_branch_disable(&arch_scale_freq_key);
> +}
> +
> +static DECLARE_WORK(disable_freq_invariance_work,
> +                   disable_freq_invariance_workfn);
> +
> +DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
> +
> +void arch_scale_freq_tick(void)
> +{
> +       u64 freq_scale;
> +       u64 aperf, mperf;
> +       u64 acnt, mcnt;
> +
> +       if (!arch_scale_freq_invariant())
> +               return;
> +
> +       rdmsrl(MSR_IA32_APERF, aperf);
> +       rdmsrl(MSR_IA32_MPERF, mperf);
> +
> +       acnt = aperf - this_cpu_read(arch_prev_aperf);
> +       mcnt = mperf - this_cpu_read(arch_prev_mperf);
> +
> +       this_cpu_write(arch_prev_aperf, aperf);
> +       this_cpu_write(arch_prev_mperf, mperf);
> +
> +       if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
> +               goto error;
> +
> +       if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
> +               goto error;
> +
> +       freq_scale = div64_u64(acnt, mcnt);
> +       if (!freq_scale)
> +               goto error;
> +
> +       if (freq_scale > SCHED_CAPACITY_SCALE)
> +               freq_scale = SCHED_CAPACITY_SCALE;
> +
> +       this_cpu_write(arch_freq_scale, freq_scale);
> +       return;
> +
> +error:
> +       pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
> +       schedule_work(&disable_freq_invariance_work);
> +}
> +#endif /* CONFIG_X86_64 && CONFIG_SMP */
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -56,7 +56,6 @@
>  #include <linux/numa.h>
>  #include <linux/pgtable.h>
>  #include <linux/overflow.h>
> -#include <linux/syscore_ops.h>
>
>  #include <asm/acpi.h>
>  #include <asm/desc.h>
> @@ -1847,357 +1846,3 @@ void native_play_dead(void)
>  }
>
>  #endif
> -
> -#ifdef CONFIG_X86_64
> -/*
> - * APERF/MPERF frequency ratio computation.
> - *
> - * The scheduler wants to do frequency invariant accounting and needs a <1
> - * ratio to account for the 'current' frequency, corresponding to
> - * freq_curr / freq_max.
> - *
> - * Since the frequency freq_curr on x86 is controlled by micro-controller and
> - * our P-state setting is little more than a request/hint, we need to observe
> - * the effective frequency 'BusyMHz', i.e. the average frequency over a time
> - * interval after discarding idle time. This is given by:
> - *
> - *   BusyMHz = delta_APERF / delta_MPERF * freq_base
> - *
> - * where freq_base is the max non-turbo P-state.
> - *
> - * The freq_max term has to be set to a somewhat arbitrary value, because we
> - * can't know which turbo states will be available at a given point in time:
> - * it all depends on the thermal headroom of the entire package. We set it to
> - * the turbo level with 4 cores active.
> - *
> - * Benchmarks show that's a good compromise between the 1C turbo ratio
> - * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
> - * which would ignore the entire turbo range (a conspicuous part, making
> - * freq_curr/freq_max always maxed out).
> - *
> - * An exception to the heuristic above is the Atom uarch, where we choose the
> - * highest turbo level for freq_max since Atom's are generally oriented towards
> - * power efficiency.
> - *
> - * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
> - * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
> - */
> -
> -DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
> -
> -static DEFINE_PER_CPU(u64, arch_prev_aperf);
> -static DEFINE_PER_CPU(u64, arch_prev_mperf);
> -static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
> -static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
> -
> -void arch_set_max_freq_ratio(bool turbo_disabled)
> -{
> -       arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
> -                                       arch_turbo_freq_ratio;
> -}
> -EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
> -
> -static bool turbo_disabled(void)
> -{
> -       u64 misc_en;
> -       int err;
> -
> -       err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
> -       if (err)
> -               return false;
> -
> -       return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
> -}
> -
> -static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
> -{
> -       int err;
> -
> -       err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
> -       if (err)
> -               return false;
> -
> -       err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
> -       if (err)
> -               return false;
> -
> -       *base_freq = (*base_freq >> 16) & 0x3F;     /* max P state */
> -       *turbo_freq = *turbo_freq & 0x3F;           /* 1C turbo    */
> -
> -       return true;
> -}
> -
> -#define X86_MATCH(model)                                       \
> -       X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6,            \
> -               INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
> -
> -static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
> -       X86_MATCH(XEON_PHI_KNL),
> -       X86_MATCH(XEON_PHI_KNM),
> -       {}
> -};
> -
> -static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
> -       X86_MATCH(SKYLAKE_X),
> -       {}
> -};
> -
> -static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
> -       X86_MATCH(ATOM_GOLDMONT),
> -       X86_MATCH(ATOM_GOLDMONT_D),
> -       X86_MATCH(ATOM_GOLDMONT_PLUS),
> -       {}
> -};
> -
> -static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
> -                               int num_delta_fratio)
> -{
> -       int fratio, delta_fratio, found;
> -       int err, i;
> -       u64 msr;
> -
> -       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
> -       if (err)
> -               return false;
> -
> -       *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
> -
> -       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
> -       if (err)
> -               return false;
> -
> -       fratio = (msr >> 8) & 0xFF;
> -       i = 16;
> -       found = 0;
> -       do {
> -               if (found >= num_delta_fratio) {
> -                       *turbo_freq = fratio;
> -                       return true;
> -               }
> -
> -               delta_fratio = (msr >> (i + 5)) & 0x7;
> -
> -               if (delta_fratio) {
> -                       found += 1;
> -                       fratio -= delta_fratio;
> -               }
> -
> -               i += 8;
> -       } while (i < 64);
> -
> -       return true;
> -}
> -
> -static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
> -{
> -       u64 ratios, counts;
> -       u32 group_size;
> -       int err, i;
> -
> -       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
> -       if (err)
> -               return false;
> -
> -       *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
> -
> -       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
> -       if (err)
> -               return false;
> -
> -       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
> -       if (err)
> -               return false;
> -
> -       for (i = 0; i < 64; i += 8) {
> -               group_size = (counts >> i) & 0xFF;
> -               if (group_size >= size) {
> -                       *turbo_freq = (ratios >> i) & 0xFF;
> -                       return true;
> -               }
> -       }
> -
> -       return false;
> -}
> -
> -static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
> -{
> -       u64 msr;
> -       int err;
> -
> -       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
> -       if (err)
> -               return false;
> -
> -       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
> -       if (err)
> -               return false;
> -
> -       *base_freq = (*base_freq >> 8) & 0xFF;    /* max P state */
> -       *turbo_freq = (msr >> 24) & 0xFF;         /* 4C turbo    */
> -
> -       /* The CPU may have less than 4 cores */
> -       if (!*turbo_freq)
> -               *turbo_freq = msr & 0xFF;         /* 1C turbo    */
> -
> -       return true;
> -}
> -
> -static bool intel_set_max_freq_ratio(void)
> -{
> -       u64 base_freq, turbo_freq;
> -       u64 turbo_ratio;
> -
> -       if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
> -               goto out;
> -
> -       if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
> -           skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
> -               goto out;
> -
> -       if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
> -           knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
> -               goto out;
> -
> -       if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
> -           skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
> -               goto out;
> -
> -       if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
> -               goto out;
> -
> -       return false;
> -
> -out:
> -       /*
> -        * Some hypervisors advertise X86_FEATURE_APERFMPERF
> -        * but then fill all MSR's with zeroes.
> -        * Some CPUs have turbo boost but don't declare any turbo ratio
> -        * in MSR_TURBO_RATIO_LIMIT.
> -        */
> -       if (!base_freq || !turbo_freq) {
> -               pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
> -               return false;
> -       }
> -
> -       turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
> -       if (!turbo_ratio) {
> -               pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
> -               return false;
> -       }
> -
> -       arch_turbo_freq_ratio = turbo_ratio;
> -       arch_set_max_freq_ratio(turbo_disabled());
> -
> -       return true;
> -}
> -
> -static void init_counter_refs(void)
> -{
> -       u64 aperf, mperf;
> -
> -       rdmsrl(MSR_IA32_APERF, aperf);
> -       rdmsrl(MSR_IA32_MPERF, mperf);
> -
> -       this_cpu_write(arch_prev_aperf, aperf);
> -       this_cpu_write(arch_prev_mperf, mperf);
> -}
> -
> -#ifdef CONFIG_PM_SLEEP
> -static struct syscore_ops freq_invariance_syscore_ops = {
> -       .resume = init_counter_refs,
> -};
> -
> -static void register_freq_invariance_syscore_ops(void)
> -{
> -       /* Bail out if registered already. */
> -       if (freq_invariance_syscore_ops.node.prev)
> -               return;
> -
> -       register_syscore_ops(&freq_invariance_syscore_ops);
> -}
> -#else
> -static inline void register_freq_invariance_syscore_ops(void) {}
> -#endif
> -
> -void init_freq_invariance(bool secondary, bool cppc_ready)
> -{
> -       bool ret = false;
> -
> -       if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
> -               return;
> -
> -       if (secondary) {
> -               if (static_branch_likely(&arch_scale_freq_key)) {
> -                       init_counter_refs();
> -               }
> -               return;
> -       }
> -
> -       if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
> -               ret = intel_set_max_freq_ratio();
> -       else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
> -               if (!cppc_ready) {
> -                       return;
> -               }
> -               ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
> -       }
> -
> -       if (ret) {
> -               init_counter_refs();
> -               static_branch_enable(&arch_scale_freq_key);
> -               register_freq_invariance_syscore_ops();
> -               pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
> -       } else {
> -               pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
> -       }
> -}
> -
> -static void disable_freq_invariance_workfn(struct work_struct *work)
> -{
> -       static_branch_disable(&arch_scale_freq_key);
> -}
> -
> -static DECLARE_WORK(disable_freq_invariance_work,
> -                   disable_freq_invariance_workfn);
> -
> -DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
> -
> -void arch_scale_freq_tick(void)
> -{
> -       u64 freq_scale;
> -       u64 aperf, mperf;
> -       u64 acnt, mcnt;
> -
> -       if (!arch_scale_freq_invariant())
> -               return;
> -
> -       rdmsrl(MSR_IA32_APERF, aperf);
> -       rdmsrl(MSR_IA32_MPERF, mperf);
> -
> -       acnt = aperf - this_cpu_read(arch_prev_aperf);
> -       mcnt = mperf - this_cpu_read(arch_prev_mperf);
> -
> -       this_cpu_write(arch_prev_aperf, aperf);
> -       this_cpu_write(arch_prev_mperf, mperf);
> -
> -       if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
> -               goto error;
> -
> -       if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
> -               goto error;
> -
> -       freq_scale = div64_u64(acnt, mcnt);
> -       if (!freq_scale)
> -               goto error;
> -
> -       if (freq_scale > SCHED_CAPACITY_SCALE)
> -               freq_scale = SCHED_CAPACITY_SCALE;
> -
> -       this_cpu_write(arch_freq_scale, freq_scale);
> -       return;
> -
> -error:
> -       pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
> -       schedule_work(&disable_freq_invariance_work);
> -}
> -#endif /* CONFIG_X86_64 */
>