2023-10-25 09:33:55

by Zeng Heng

[permalink] [raw]
Subject: [PATCH 0/3] Make the cpuinfo_cur_freq interface read correctly

Patch 1~2: It is required to ensure that amu can continue to work normally
during the sample period while cstate is enabled.

Patch 3: Even works in the mmio mode, cpc_read still has random delay
errors, so let's extend the sampling time.

Zeng Heng (3):
arm64: cpufeature: Export cpu_has_amu_feat()
cpufreq: CPPC: Keep the target core awake when reading its cpufreq
rate
cpufreq: CPPC: Eliminate the impact of cpc_read() latency error

arch/arm64/kernel/cpufeature.c | 1 +
drivers/cpufreq/cppc_cpufreq.c | 53 ++++++++++++++++++++++++++++------
2 files changed, 45 insertions(+), 9 deletions(-)

--
2.25.1


2023-10-25 09:34:01

by Zeng Heng

[permalink] [raw]
Subject: [PATCH 1/3] arm64: cpufeature: Export cpu_has_amu_feat()

Export the cpu_has_amu_feat() function for using by cppc_cpufreq.c to check
if the processor implements ARM's Activity Monitor Unit (AMU).

Signed-off-by: Zeng Heng <[email protected]>
---
arch/arm64/kernel/cpufeature.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 444a73c2e638..47195e66a820 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1935,6 +1935,7 @@ bool cpu_has_amu_feat(int cpu)
{
return cpumask_test_cpu(cpu, &amu_cpus);
}
+EXPORT_SYMBOL(cpu_has_amu_feat);

int get_cpu_with_amu_feat(void)
{
--
2.25.1

2023-10-25 09:34:09

by Zeng Heng

[permalink] [raw]
Subject: [PATCH 2/3] cpufreq: CPPC: Keep the target core awake when reading its cpufreq rate

As ARM AMU's document says, all counters are subject to any changes
in clock frequency, including clock stopping caused by the WFI and WFE
instructions.

Therefore, using smp_call_on_cpu() to trigger target CPU to
read self's AMU counters, which ensures the counters are working
properly while cstate feature is enabled.

Reported-by: Sumit Gupta <[email protected]>
Link: https://lore.kernel.org/all/[email protected]/
Signed-off-by: Zeng Heng <[email protected]>
---
drivers/cpufreq/cppc_cpufreq.c | 39 ++++++++++++++++++++++++++--------
1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index fe08ca419b3d..321a9dc9484d 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -90,6 +90,12 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
struct cppc_perf_fb_ctrs *fb_ctrs_t0,
struct cppc_perf_fb_ctrs *fb_ctrs_t1);

+struct fb_ctr_pair {
+ u32 cpu;
+ struct cppc_perf_fb_ctrs fb_ctrs_t0;
+ struct cppc_perf_fb_ctrs fb_ctrs_t1;
+};
+
/**
* cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
* @work: The work item.
@@ -840,9 +846,24 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
return (reference_perf * delta_delivered) / delta_reference;
}

+static int cppc_get_perf_ctrs_pair(void *val)
+{
+ struct fb_ctr_pair *fb_ctrs = val;
+ int cpu = fb_ctrs->cpu;
+ int ret;
+
+ ret = cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t0);
+ if (ret)
+ return ret;
+
+ udelay(2); /* 2usec delay between sampling */
+
+ return cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t1);
+}
+
static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
{
- struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
+ struct fb_ctr_pair fb_ctrs = { .cpu = cpu, };
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
struct cppc_cpudata *cpu_data = policy->driver_data;
u64 delivered_perf;
@@ -850,18 +871,18 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)

cpufreq_cpu_put(policy);

- ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
- if (ret)
- return 0;
-
- udelay(2); /* 2usec delay between sampling */
+ if (cpu_has_amu_feat(cpu))
+ ret = smp_call_on_cpu(cpu, cppc_get_perf_ctrs_pair,
+ &fb_ctrs, false);
+ else
+ ret = cppc_get_perf_ctrs_pair(&fb_ctrs);

- ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
if (ret)
return 0;

- delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
- &fb_ctrs_t1);
+ delivered_perf = cppc_perf_from_fbctrs(cpu_data,
+ &fb_ctrs.fb_ctrs_t0,
+ &fb_ctrs.fb_ctrs_t1);

return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
}
--
2.25.1

2023-10-25 09:34:31

by Zeng Heng

[permalink] [raw]
Subject: [PATCH 3/3] cpufreq: CPPC: Eliminate the impact of cpc_read() latency error

We have found significant differences in the latency of cpc_read() between
regular scenarios and scenarios with high memory access pressure. Ignoring
this error can result in getting rate interface occasionally returning
absurd values.

Here provides a high memory access sample test by stress-ng. My local
testing platform includes 160 CPUs, the CPC registers is accessed by mmio
method, and the cpuidle feature is disabled (the AMU always works online):

~~~
./stress-ng --memrate 160 --timeout 180
~~~

The following data is sourced from ftrace statistics towards
cppc_get_perf_ctrs():

Regular scenarios || High memory access pressure scenarios
104) | cppc_get_perf_ctrs() { || 133) | cppc_get_perf_ctrs() {
104) 0.800 us | cpc_read.isra.0(); || 133) 4.580 us | cpc_read.isra.0();
104) 0.640 us | cpc_read.isra.0(); || 133) 7.780 us | cpc_read.isra.0();
104) 0.450 us | cpc_read.isra.0(); || 133) 2.550 us | cpc_read.isra.0();
104) 0.430 us | cpc_read.isra.0(); || 133) 0.570 us | cpc_read.isra.0();
104) 4.610 us | } || 133) ! 157.610 us | }
104) | cppc_get_perf_ctrs() { || 133) | cppc_get_perf_ctrs() {
104) 0.720 us | cpc_read.isra.0(); || 133) 0.760 us | cpc_read.isra.0();
104) 0.720 us | cpc_read.isra.0(); || 133) 4.480 us | cpc_read.isra.0();
104) 0.510 us | cpc_read.isra.0(); || 133) 0.520 us | cpc_read.isra.0();
104) 0.500 us | cpc_read.isra.0(); || 133) + 10.100 us | cpc_read.isra.0();
104) 3.460 us | } || 133) ! 120.850 us | }
108) | cppc_get_perf_ctrs() { || 87) | cppc_get_perf_ctrs() {
108) 0.820 us | cpc_read.isra.0(); || 87) ! 255.200 us | cpc_read.isra.0();
108) 0.850 us | cpc_read.isra.0(); || 87) 2.910 us | cpc_read.isra.0();
108) 0.590 us | cpc_read.isra.0(); || 87) 5.160 us | cpc_read.isra.0();
108) 0.610 us | cpc_read.isra.0(); || 87) 4.340 us | cpc_read.isra.0();
108) 5.080 us | } || 87) ! 315.790 us | }
108) | cppc_get_perf_ctrs() { || 87) | cppc_get_perf_ctrs() {
108) 0.630 us | cpc_read.isra.0(); || 87) 0.800 us | cpc_read.isra.0();
108) 0.630 us | cpc_read.isra.0(); || 87) 6.310 us | cpc_read.isra.0();
108) 0.420 us | cpc_read.isra.0(); || 87) 1.190 us | cpc_read.isra.0();
108) 0.430 us | cpc_read.isra.0(); || 87) + 11.620 us | cpc_read.isra.0();
108) 3.780 us | } || 87) ! 207.010 us | }

My local testing platform works under 3000000hz, but the cpuinfo_cur_freq
interface returns values that are not even close to the actual frequency:

[root@localhost ~]# cd /sys/devices/system/cpu
[root@localhost cpu]# for i in {0..159}; do cat cpu$i/cpufreq/cpuinfo_cur_freq; done
5127812
2952127
3069001
3496183
922989768
2419194
3427042
2331869
3594611
8238499
...

The reason is when under heavy memory access pressure, the execution of
cpc_read() delay has increased from sub-microsecond to several hundred
microseconds. Moving the cpc_read function into a critical section by irq
disable/enable has minimal impact on the result.

cppc_get_perf_ctrs()[0] cppc_get_perf_ctrs()[1]
/ \ / \
cpc_read cpc_read cpc_read cpc_read
ref[0] delivered[0] ref[1] delivered[1]
| | | |
v v v v
-----------------------------------------------------------------------> time
<--delta[0]--> <------sample_period------> <-----delta[1]----->

Since that,
freq = ref_freq * (delivered[1] - delivered[0]) / (ref[1] - ref[0])
and
delivered[1] - delivered[0] = freq * (delta[1] + sample_period),
ref[1] - ref[0] = ref_freq * (delta[0] + sample_period)

To eliminate the impact of system memory access latency, setting a
sampling period of 2us is far from sufficient. Consequently, we suggest
cppc_cpufreq_get_rate() only can be called in the process context, and
adopt a longer sampling period to neutralize the impact of random latency.

Here we call the cond_resched() function instead of sleep-like functions
to ensure that `taskset -c $i cat cpu$i/cpufreq/cpuinfo_cur_freq` could
work when cpuidle feature is enabled.

Reported-by: Yang Shi <[email protected]>
Link: https://lore.kernel.org/all/[email protected]/
Signed-off-by: Zeng Heng <[email protected]>
---
drivers/cpufreq/cppc_cpufreq.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 321a9dc9484d..a7c5418bcda7 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -851,12 +851,26 @@ static int cppc_get_perf_ctrs_pair(void *val)
struct fb_ctr_pair *fb_ctrs = val;
int cpu = fb_ctrs->cpu;
int ret;
+ unsigned long timeout;

ret = cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t0);
if (ret)
return ret;

- udelay(2); /* 2usec delay between sampling */
+ if (likely(!irqs_disabled())) {
+ /*
+ * Set 1ms as sampling interval, but never schedule
+ * to the idle task to prevent the AMU counters from
+ * stopping working.
+ */
+ timeout = jiffies + msecs_to_jiffies(1);
+ while (!time_after(jiffies, timeout))
+ cond_resched();
+
+ } else {
+ pr_warn_once("CPU%d: Get rate in atomic context", cpu);
+ udelay(2); /* 2usec delay between sampling */
+ }

return cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t1);
}
--
2.25.1

2023-10-25 10:54:49

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH 2/3] cpufreq: CPPC: Keep the target core awake when reading its cpufreq rate

[adding Ionela]

On Wed, Oct 25, 2023 at 05:38:46PM +0800, Zeng Heng wrote:
> As ARM AMU's document says, all counters are subject to any changes
> in clock frequency, including clock stopping caused by the WFI and WFE
> instructions.
>
> Therefore, using smp_call_on_cpu() to trigger target CPU to
> read self's AMU counters, which ensures the counters are working
> properly while cstate feature is enabled.

IIUC there's a pretty deliberate split with all the actual reading of the AMU
living in arch/arm64/kernel/topolgy.c, and the driver code being (relatively)
generic.

We already have code in arch/arm64/kernel/topolgy.c to read counters on a
specific CPU; why can't e reuse that (and avoid exporting cpu_has_amu_feat())?

Mark.

>
> Reported-by: Sumit Gupta <[email protected]>
> Link: https://lore.kernel.org/all/[email protected]/
> Signed-off-by: Zeng Heng <[email protected]>
> ---
> drivers/cpufreq/cppc_cpufreq.c | 39 ++++++++++++++++++++++++++--------
> 1 file changed, 30 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> index fe08ca419b3d..321a9dc9484d 100644
> --- a/drivers/cpufreq/cppc_cpufreq.c
> +++ b/drivers/cpufreq/cppc_cpufreq.c
> @@ -90,6 +90,12 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
> struct cppc_perf_fb_ctrs *fb_ctrs_t0,
> struct cppc_perf_fb_ctrs *fb_ctrs_t1);
>
> +struct fb_ctr_pair {
> + u32 cpu;
> + struct cppc_perf_fb_ctrs fb_ctrs_t0;
> + struct cppc_perf_fb_ctrs fb_ctrs_t1;
> +};
> +
> /**
> * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
> * @work: The work item.
> @@ -840,9 +846,24 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
> return (reference_perf * delta_delivered) / delta_reference;
> }
>
> +static int cppc_get_perf_ctrs_pair(void *val)
> +{
> + struct fb_ctr_pair *fb_ctrs = val;
> + int cpu = fb_ctrs->cpu;
> + int ret;
> +
> + ret = cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t0);
> + if (ret)
> + return ret;
> +
> + udelay(2); /* 2usec delay between sampling */
> +
> + return cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t1);
> +}
> +
> static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
> {
> - struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
> + struct fb_ctr_pair fb_ctrs = { .cpu = cpu, };
> struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
> struct cppc_cpudata *cpu_data = policy->driver_data;
> u64 delivered_perf;
> @@ -850,18 +871,18 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
>
> cpufreq_cpu_put(policy);
>
> - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
> - if (ret)
> - return 0;
> -
> - udelay(2); /* 2usec delay between sampling */
> + if (cpu_has_amu_feat(cpu))
> + ret = smp_call_on_cpu(cpu, cppc_get_perf_ctrs_pair,
> + &fb_ctrs, false);
> + else
> + ret = cppc_get_perf_ctrs_pair(&fb_ctrs);
>
> - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
> if (ret)
> return 0;
>
> - delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
> - &fb_ctrs_t1);
> + delivered_perf = cppc_perf_from_fbctrs(cpu_data,
> + &fb_ctrs.fb_ctrs_t0,
> + &fb_ctrs.fb_ctrs_t1);
>
> return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
> }
> --
> 2.25.1
>

2023-10-25 11:01:47

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH 3/3] cpufreq: CPPC: Eliminate the impact of cpc_read() latency error

On Wed, Oct 25, 2023 at 05:38:47PM +0800, Zeng Heng wrote:
> We have found significant differences in the latency of cpc_read() between
> regular scenarios and scenarios with high memory access pressure. Ignoring
> this error can result in getting rate interface occasionally returning
> absurd values.
>
> Here provides a high memory access sample test by stress-ng. My local
> testing platform includes 160 CPUs, the CPC registers is accessed by mmio
> method, and the cpuidle feature is disabled (the AMU always works online):
>
> ~~~
> ./stress-ng --memrate 160 --timeout 180
> ~~~
>
> The following data is sourced from ftrace statistics towards
> cppc_get_perf_ctrs():
>
> Regular scenarios || High memory access pressure scenarios
> 104) | cppc_get_perf_ctrs() { || 133) | cppc_get_perf_ctrs() {
> 104) 0.800 us | cpc_read.isra.0(); || 133) 4.580 us | cpc_read.isra.0();
> 104) 0.640 us | cpc_read.isra.0(); || 133) 7.780 us | cpc_read.isra.0();
> 104) 0.450 us | cpc_read.isra.0(); || 133) 2.550 us | cpc_read.isra.0();
> 104) 0.430 us | cpc_read.isra.0(); || 133) 0.570 us | cpc_read.isra.0();
> 104) 4.610 us | } || 133) ! 157.610 us | }
> 104) | cppc_get_perf_ctrs() { || 133) | cppc_get_perf_ctrs() {
> 104) 0.720 us | cpc_read.isra.0(); || 133) 0.760 us | cpc_read.isra.0();
> 104) 0.720 us | cpc_read.isra.0(); || 133) 4.480 us | cpc_read.isra.0();
> 104) 0.510 us | cpc_read.isra.0(); || 133) 0.520 us | cpc_read.isra.0();
> 104) 0.500 us | cpc_read.isra.0(); || 133) + 10.100 us | cpc_read.isra.0();
> 104) 3.460 us | } || 133) ! 120.850 us | }
> 108) | cppc_get_perf_ctrs() { || 87) | cppc_get_perf_ctrs() {
> 108) 0.820 us | cpc_read.isra.0(); || 87) ! 255.200 us | cpc_read.isra.0();
> 108) 0.850 us | cpc_read.isra.0(); || 87) 2.910 us | cpc_read.isra.0();
> 108) 0.590 us | cpc_read.isra.0(); || 87) 5.160 us | cpc_read.isra.0();
> 108) 0.610 us | cpc_read.isra.0(); || 87) 4.340 us | cpc_read.isra.0();
> 108) 5.080 us | } || 87) ! 315.790 us | }
> 108) | cppc_get_perf_ctrs() { || 87) | cppc_get_perf_ctrs() {
> 108) 0.630 us | cpc_read.isra.0(); || 87) 0.800 us | cpc_read.isra.0();
> 108) 0.630 us | cpc_read.isra.0(); || 87) 6.310 us | cpc_read.isra.0();
> 108) 0.420 us | cpc_read.isra.0(); || 87) 1.190 us | cpc_read.isra.0();
> 108) 0.430 us | cpc_read.isra.0(); || 87) + 11.620 us | cpc_read.isra.0();
> 108) 3.780 us | } || 87) ! 207.010 us | }
>
> My local testing platform works under 3000000hz, but the cpuinfo_cur_freq
> interface returns values that are not even close to the actual frequency:
>
> [root@localhost ~]# cd /sys/devices/system/cpu
> [root@localhost cpu]# for i in {0..159}; do cat cpu$i/cpufreq/cpuinfo_cur_freq; done
> 5127812
> 2952127
> 3069001
> 3496183
> 922989768
> 2419194
> 3427042
> 2331869
> 3594611
> 8238499
> ...
>
> The reason is when under heavy memory access pressure, the execution of
> cpc_read() delay has increased from sub-microsecond to several hundred
> microseconds. Moving the cpc_read function into a critical section by irq
> disable/enable has minimal impact on the result.
>
> cppc_get_perf_ctrs()[0] cppc_get_perf_ctrs()[1]
> / \ / \
> cpc_read cpc_read cpc_read cpc_read
> ref[0] delivered[0] ref[1] delivered[1]
> | | | |
> v v v v
> -----------------------------------------------------------------------> time
> <--delta[0]--> <------sample_period------> <-----delta[1]----->
>
> Since that,
> freq = ref_freq * (delivered[1] - delivered[0]) / (ref[1] - ref[0])
> and
> delivered[1] - delivered[0] = freq * (delta[1] + sample_period),
> ref[1] - ref[0] = ref_freq * (delta[0] + sample_period)
>
> To eliminate the impact of system memory access latency, setting a
> sampling period of 2us is far from sufficient. Consequently, we suggest
> cppc_cpufreq_get_rate() only can be called in the process context, and
> adopt a longer sampling period to neutralize the impact of random latency.
>
> Here we call the cond_resched() function instead of sleep-like functions
> to ensure that `taskset -c $i cat cpu$i/cpufreq/cpuinfo_cur_freq` could
> work when cpuidle feature is enabled.
>
> Reported-by: Yang Shi <[email protected]>
> Link: https://lore.kernel.org/all/[email protected]/
> Signed-off-by: Zeng Heng <[email protected]>
> ---
> drivers/cpufreq/cppc_cpufreq.c | 16 +++++++++++++++-
> 1 file changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> index 321a9dc9484d..a7c5418bcda7 100644
> --- a/drivers/cpufreq/cppc_cpufreq.c
> +++ b/drivers/cpufreq/cppc_cpufreq.c
> @@ -851,12 +851,26 @@ static int cppc_get_perf_ctrs_pair(void *val)

The previous patch added this function, and calls it with smp_call_on_cpu(),
where it'll run in IRQ context with IRQs disabled...

> struct fb_ctr_pair *fb_ctrs = val;
> int cpu = fb_ctrs->cpu;
> int ret;
> + unsigned long timeout;
>
> ret = cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t0);
> if (ret)
> return ret;
>
> - udelay(2); /* 2usec delay between sampling */
> + if (likely(!irqs_disabled())) {
> + /*
> + * Set 1ms as sampling interval, but never schedule
> + * to the idle task to prevent the AMU counters from
> + * stopping working.
> + */
> + timeout = jiffies + msecs_to_jiffies(1);
> + while (!time_after(jiffies, timeout))
> + cond_resched();
> +
> + } else {

... so we'll enter this branch of the if-else ...

> + pr_warn_once("CPU%d: Get rate in atomic context", cpu);

... and pr_warn_once() for something that's apparently normal and outside of
the user's control?

That doesn't make much sense to me.

Mark.

> + udelay(2); /* 2usec delay between sampling */
> + }
>
> return cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t1);
> }
> --
> 2.25.1
>

2023-10-25 11:14:50

by Sudeep Holla

[permalink] [raw]
Subject: Re: [PATCH 2/3] cpufreq: CPPC: Keep the target core awake when reading its cpufreq rate

On Wed, Oct 25, 2023 at 05:38:46PM +0800, Zeng Heng wrote:
> As ARM AMU's document says, all counters are subject to any changes
> in clock frequency, including clock stopping caused by the WFI and WFE
> instructions.
>
> Therefore, using smp_call_on_cpu() to trigger target CPU to
> read self's AMU counters, which ensures the counters are working
> properly while cstate feature is enabled.
>
> Reported-by: Sumit Gupta <[email protected]>
> Link: https://lore.kernel.org/all/[email protected]/
> Signed-off-by: Zeng Heng <[email protected]>
> ---
> drivers/cpufreq/cppc_cpufreq.c | 39 ++++++++++++++++++++++++++--------
> 1 file changed, 30 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> index fe08ca419b3d..321a9dc9484d 100644
> --- a/drivers/cpufreq/cppc_cpufreq.c
> +++ b/drivers/cpufreq/cppc_cpufreq.c

[...]

> @@ -850,18 +871,18 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
>
> cpufreq_cpu_put(policy);
>
> - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
> - if (ret)
> - return 0;
> -
> - udelay(2); /* 2usec delay between sampling */
> + if (cpu_has_amu_feat(cpu))

Have you compiled this on x86 ? Even if you have somehow managed to,
this is not the right place to check the presence of AMU feature on
the CPU.

If AMU registers are used in CPPC, they must be using FFH GAS, in which
case the interpretation of FFH is architecture dependent code.

--
Regards,
Sudeep

2023-10-25 14:58:24

by Sumit Gupta

[permalink] [raw]
Subject: Re: [PATCH 2/3] cpufreq: CPPC: Keep the target core awake when reading its cpufreq rate




> [adding Ionela]
>
> On Wed, Oct 25, 2023 at 05:38:46PM +0800, Zeng Heng wrote:
>> As ARM AMU's document says, all counters are subject to any changes
>> in clock frequency, including clock stopping caused by the WFI and WFE
>> instructions.
>>
>> Therefore, using smp_call_on_cpu() to trigger target CPU to
>> read self's AMU counters, which ensures the counters are working
>> properly while cstate feature is enabled.
>
> IIUC there's a pretty deliberate split with all the actual reading of the AMU
> living in arch/arm64/kernel/topolgy.c, and the driver code being (relatively)
> generic.
>
> We already have code in arch/arm64/kernel/topolgy.c to read counters on a
> specific CPU; why can't e reuse that (and avoid exporting cpu_has_amu_feat())?


This patch seems mostly based on my previous patch [1] and discussed
here [2] already. Beata [CCed] shared an alternate approach [3]
leveraging existing code from 'topology.c' to get the average freq for
last tick period.


Beata,

Could you share v2 of [3] with the request to merge. We can try to solve
the issue with CPU IDLE case later on top?

Additionally, also please include the fix in [4] if it looks fine.

Best Regards,
Sumit Gupta

[1] https://lore.kernel.org/all/[email protected]/
[2]
https://lore.kernel.org/lkml/[email protected]/T/#m2174305de4706006e0bd9c103a0e5ff61cea7a12
[3]
https://lore.kernel.org/lkml/[email protected]/
[4]
https://lore.kernel.org/lkml/[email protected]/


>>
>> Reported-by: Sumit Gupta <[email protected]>
>> Link: https://lore.kernel.org/all/[email protected]/
>> Signed-off-by: Zeng Heng <[email protected]>
>> ---
>> drivers/cpufreq/cppc_cpufreq.c | 39 ++++++++++++++++++++++++++--------
>> 1 file changed, 30 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
>> index fe08ca419b3d..321a9dc9484d 100644
>> --- a/drivers/cpufreq/cppc_cpufreq.c
>> +++ b/drivers/cpufreq/cppc_cpufreq.c
>> @@ -90,6 +90,12 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
>> struct cppc_perf_fb_ctrs *fb_ctrs_t0,
>> struct cppc_perf_fb_ctrs *fb_ctrs_t1);
>>
>> +struct fb_ctr_pair {
>> + u32 cpu;
>> + struct cppc_perf_fb_ctrs fb_ctrs_t0;
>> + struct cppc_perf_fb_ctrs fb_ctrs_t1;
>> +};
>> +
>> /**
>> * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
>> * @work: The work item.
>> @@ -840,9 +846,24 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
>> return (reference_perf * delta_delivered) / delta_reference;
>> }
>>
>> +static int cppc_get_perf_ctrs_pair(void *val)
>> +{
>> + struct fb_ctr_pair *fb_ctrs = val;
>> + int cpu = fb_ctrs->cpu;
>> + int ret;
>> +
>> + ret = cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t0);
>> + if (ret)
>> + return ret;
>> +
>> + udelay(2); /* 2usec delay between sampling */
>> +
>> + return cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t1);
>> +}
>> +
>> static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
>> {
>> - struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
>> + struct fb_ctr_pair fb_ctrs = { .cpu = cpu, };
>> struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
>> struct cppc_cpudata *cpu_data = policy->driver_data;
>> u64 delivered_perf;
>> @@ -850,18 +871,18 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
>>
>> cpufreq_cpu_put(policy);
>>
>> - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
>> - if (ret)
>> - return 0;
>> -
>> - udelay(2); /* 2usec delay between sampling */
>> + if (cpu_has_amu_feat(cpu))
>> + ret = smp_call_on_cpu(cpu, cppc_get_perf_ctrs_pair,
>> + &fb_ctrs, false);
>> + else
>> + ret = cppc_get_perf_ctrs_pair(&fb_ctrs);
>>
>> - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
>> if (ret)
>> return 0;
>>
>> - delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
>> - &fb_ctrs_t1);
>> + delivered_perf = cppc_perf_from_fbctrs(cpu_data,
>> + &fb_ctrs.fb_ctrs_t0,
>> + &fb_ctrs.fb_ctrs_t1);
>>
>> return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
>> }
>> --
>> 2.25.1
>>

2023-10-26 01:56:06

by Zeng Heng

[permalink] [raw]
Subject: Re: [PATCH 3/3] cpufreq: CPPC: Eliminate the impact of cpc_read() latency error


在 2023/10/25 19:01, Mark Rutland 写道:
> On Wed, Oct 25, 2023 at 05:38:47PM +0800, Zeng Heng wrote:
>
> The previous patch added this function, and calls it with smp_call_on_cpu(),
> where it'll run in IRQ context with IRQs disabled...

smp_call_on_cpu() puts the work to the bind-cpu worker.

And this function will be called in task context, and IRQs is certainly enabled.


Zeng Heng

>> struct fb_ctr_pair *fb_ctrs = val;
>> int cpu = fb_ctrs->cpu;
>> int ret;
>> + unsigned long timeout;
>>
>> ret = cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t0);
>> if (ret)
>> return ret;
>>
>> - udelay(2); /* 2usec delay between sampling */
>> + if (likely(!irqs_disabled())) {
>> + /*
>> + * Set 1ms as sampling interval, but never schedule
>> + * to the idle task to prevent the AMU counters from
>> + * stopping working.
>> + */
>> + timeout = jiffies + msecs_to_jiffies(1);
>> + while (!time_after(jiffies, timeout))
>> + cond_resched();
>> +
>> + } else {
> ... so we'll enter this branch of the if-else ...
>
>> + pr_warn_once("CPU%d: Get rate in atomic context", cpu);
> ... and pr_warn_once() for something that's apparently normal and outside of
> the user's control?
>
> That doesn't make much sense to me.
>
> Mark.
>
>> + udelay(2); /* 2usec delay between sampling */
>> + }
>>
>> return cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t1);
>> }
>> --
>> 2.25.1
>>

2023-10-26 02:26:48

by Zeng Heng

[permalink] [raw]
Subject: Re: [PATCH 2/3] cpufreq: CPPC: Keep the target core awake when reading its cpufreq rate


在 2023/10/25 19:13, Sudeep Holla 写道:
> On Wed, Oct 25, 2023 at 05:38:46PM +0800, Zeng Heng wrote:
>> As ARM AMU's document says, all counters are subject to any changes
>> in clock frequency, including clock stopping caused by the WFI and WFE
>> instructions.
>>
>> Therefore, using smp_call_on_cpu() to trigger target CPU to
>> read self's AMU counters, which ensures the counters are working
>> properly while cstate feature is enabled.
>>
>> Reported-by: Sumit Gupta <[email protected]>
>> Link: https://lore.kernel.org/all/[email protected]/
>> Signed-off-by: Zeng Heng <[email protected]>
>> ---
>> drivers/cpufreq/cppc_cpufreq.c | 39 ++++++++++++++++++++++++++--------
>> 1 file changed, 30 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
>> index fe08ca419b3d..321a9dc9484d 100644
>> --- a/drivers/cpufreq/cppc_cpufreq.c
>> +++ b/drivers/cpufreq/cppc_cpufreq.c
> [...]
>
>> @@ -850,18 +871,18 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
>>
>> cpufreq_cpu_put(policy);
>>
>> - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
>> - if (ret)
>> - return 0;
>> -
>> - udelay(2); /* 2usec delay between sampling */
>> + if (cpu_has_amu_feat(cpu))
> Have you compiled this on x86 ? Even if you have somehow managed to,
> this is not the right place to check the presence of AMU feature on
> the CPU.
> If AMU registers are used in CPPC, they must be using FFH GAS, in which
> case the interpretation of FFH is architecture dependent code.


According to drivers/cpufreq/Makefile, cppc_cpufreq.c is only compiled with

ARM architecture.

But here, I would change cpu_has_amu_feat() with cpc_ffh_supported(), which

belongs to FFH APIs.

Thanks for the suggestion.


Thanks again,

Zeng Heng



> --
> Regards,
> Sudeep
>

2023-10-26 03:22:27

by Zeng Heng

[permalink] [raw]
Subject: Re: [PATCH 2/3] cpufreq: CPPC: Keep the target core awake when reading its cpufreq rate


在 2023/10/25 18:54, Mark Rutland 写道:
> [adding Ionela]
>
> On Wed, Oct 25, 2023 at 05:38:46PM +0800, Zeng Heng wrote:
>> As ARM AMU's document says, all counters are subject to any changes
>> in clock frequency, including clock stopping caused by the WFI and WFE
>> instructions.
>>
>> Therefore, using smp_call_on_cpu() to trigger target CPU to
>> read self's AMU counters, which ensures the counters are working
>> properly while cstate feature is enabled.
> IIUC there's a pretty deliberate split with all the actual reading of the AMU
> living in arch/arm64/kernel/topolgy.c, and the driver code being (relatively)
> generic.
>
> We already have code in arch/arm64/kernel/topolgy.c to read counters on a
> specific CPU; why can't e reuse that (and avoid exporting cpu_has_amu_feat())?
>
> Mark.

In this scenario, both topology.c and cppc_acpi.c do not provide an API
to keep the AMU online

during the whole sampling period. Just using cpc_read_ffh at the start
and end of the sampling

period is not enough.

However, I can propose cpc_ffh_supported() function to replace the
cpu_has_amu_feat() as v2

if you think this patch set is still valuable.


Thanks,

Zeng Heng

>> Reported-by: Sumit Gupta <[email protected]>
>> Link: https://lore.kernel.org/all/[email protected]/
>> Signed-off-by: Zeng Heng <[email protected]>
>> ---
>> drivers/cpufreq/cppc_cpufreq.c | 39 ++++++++++++++++++++++++++--------
>> 1 file changed, 30 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
>> index fe08ca419b3d..321a9dc9484d 100644
>> --- a/drivers/cpufreq/cppc_cpufreq.c
>> +++ b/drivers/cpufreq/cppc_cpufreq.c
>> @@ -90,6 +90,12 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
>> struct cppc_perf_fb_ctrs *fb_ctrs_t0,
>> struct cppc_perf_fb_ctrs *fb_ctrs_t1);
>>
>> +struct fb_ctr_pair {
>> + u32 cpu;
>> + struct cppc_perf_fb_ctrs fb_ctrs_t0;
>> + struct cppc_perf_fb_ctrs fb_ctrs_t1;
>> +};
>> +
>> /**
>> * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
>> * @work: The work item.
>> @@ -840,9 +846,24 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
>> return (reference_perf * delta_delivered) / delta_reference;
>> }
>>
>> +static int cppc_get_perf_ctrs_pair(void *val)
>> +{
>> + struct fb_ctr_pair *fb_ctrs = val;
>> + int cpu = fb_ctrs->cpu;
>> + int ret;
>> +
>> + ret = cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t0);
>> + if (ret)
>> + return ret;
>> +
>> + udelay(2); /* 2usec delay between sampling */
>> +
>> + return cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t1);
>> +}
>> +
>> static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
>> {
>> - struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
>> + struct fb_ctr_pair fb_ctrs = { .cpu = cpu, };
>> struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
>> struct cppc_cpudata *cpu_data = policy->driver_data;
>> u64 delivered_perf;
>> @@ -850,18 +871,18 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
>>
>> cpufreq_cpu_put(policy);
>>
>> - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
>> - if (ret)
>> - return 0;
>> -
>> - udelay(2); /* 2usec delay between sampling */
>> + if (cpu_has_amu_feat(cpu))
>> + ret = smp_call_on_cpu(cpu, cppc_get_perf_ctrs_pair,
>> + &fb_ctrs, false);
>> + else
>> + ret = cppc_get_perf_ctrs_pair(&fb_ctrs);
>>
>> - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
>> if (ret)
>> return 0;
>>
>> - delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
>> - &fb_ctrs_t1);
>> + delivered_perf = cppc_perf_from_fbctrs(cpu_data,
>> + &fb_ctrs.fb_ctrs_t0,
>> + &fb_ctrs.fb_ctrs_t1);
>>
>> return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
>> }
>> --
>> 2.25.1
>>

2023-10-26 08:54:27

by Sudeep Holla

[permalink] [raw]
Subject: Re: [PATCH 2/3] cpufreq: CPPC: Keep the target core awake when reading its cpufreq rate

On Thu, Oct 26, 2023 at 10:24:54AM +0800, Zeng Heng wrote:
>
> 在 2023/10/25 19:13, Sudeep Holla 写道:
> > On Wed, Oct 25, 2023 at 05:38:46PM +0800, Zeng Heng wrote:
> > > As ARM AMU's document says, all counters are subject to any changes
> > > in clock frequency, including clock stopping caused by the WFI and WFE
> > > instructions.
> > >
> > > Therefore, using smp_call_on_cpu() to trigger target CPU to
> > > read self's AMU counters, which ensures the counters are working
> > > properly while cstate feature is enabled.
> > >
> > > Reported-by: Sumit Gupta <[email protected]>
> > > Link: https://lore.kernel.org/all/[email protected]/
> > > Signed-off-by: Zeng Heng <[email protected]>
> > > ---
> > > drivers/cpufreq/cppc_cpufreq.c | 39 ++++++++++++++++++++++++++--------
> > > 1 file changed, 30 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> > > index fe08ca419b3d..321a9dc9484d 100644
> > > --- a/drivers/cpufreq/cppc_cpufreq.c
> > > +++ b/drivers/cpufreq/cppc_cpufreq.c
> > [...]
> >
> > > @@ -850,18 +871,18 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
> > > cpufreq_cpu_put(policy);
> > > - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
> > > - if (ret)
> > > - return 0;
> > > -
> > > - udelay(2); /* 2usec delay between sampling */
> > > + if (cpu_has_amu_feat(cpu))
> > Have you compiled this on x86 ? Even if you have somehow managed to,
> > this is not the right place to check the presence of AMU feature on
> > the CPU.
> > If AMU registers are used in CPPC, they must be using FFH GAS, in which
> > case the interpretation of FFH is architecture dependent code.
>
> According to drivers/cpufreq/Makefile, cppc_cpufreq.c is only compiled with
> ARM architecture.
>

Well that's true but this change doesn't belong to cppc_cpufreq.c, it must
be part of drivers/acpi/cppc_acpi.c IMO and sorry I assumed that without
explicitly mentioning that here.

> But here, I would change cpu_has_amu_feat() with cpc_ffh_supported(), which
> belongs to FFH APIs.
>

It is not like that. cppc_acpi.c will know the GAS is FFH based so no need to
check anything there. I see counters_read_on_cpu() called from cpc_ffh_read()
already takes care of reading the AMUs on the right CPU. What exactly is
the issue you are seeing ? I don't if this change is needed at all.

--
Regards,
Sudeep

2023-10-26 09:06:29

by Zeng Heng

[permalink] [raw]
Subject: Re: [PATCH 2/3] cpufreq: CPPC: Keep the target core awake when reading its cpufreq rate


在 2023/10/26 16:53, Sudeep Holla 写道:
> On Thu, Oct 26, 2023 at 10:24:54AM +0800, Zeng Heng wrote:
>> 在 2023/10/25 19:13, Sudeep Holla 写道:
>>> On Wed, Oct 25, 2023 at 05:38:46PM +0800, Zeng Heng wrote:
>>>> As ARM AMU's document says, all counters are subject to any changes
>>>> in clock frequency, including clock stopping caused by the WFI and WFE
>>>> instructions.
>>>>
>>>> Therefore, using smp_call_on_cpu() to trigger target CPU to
>>>> read self's AMU counters, which ensures the counters are working
>>>> properly while cstate feature is enabled.
>>>>
>>>> Reported-by: Sumit Gupta <[email protected]>
>>>> Link: https://lore.kernel.org/all/[email protected]/
>>>> Signed-off-by: Zeng Heng <[email protected]>
>>>> ---
>>>> drivers/cpufreq/cppc_cpufreq.c | 39 ++++++++++++++++++++++++++--------
>>>> 1 file changed, 30 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
>>>> index fe08ca419b3d..321a9dc9484d 100644
>>>> --- a/drivers/cpufreq/cppc_cpufreq.c
>>>> +++ b/drivers/cpufreq/cppc_cpufreq.c
>>> [...]
>>>
>>>> @@ -850,18 +871,18 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
>>>> cpufreq_cpu_put(policy);
>>>> - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
>>>> - if (ret)
>>>> - return 0;
>>>> -
>>>> - udelay(2); /* 2usec delay between sampling */
>>>> + if (cpu_has_amu_feat(cpu))
>>> Have you compiled this on x86 ? Even if you have somehow managed to,
>>> this is not the right place to check the presence of AMU feature on
>>> the CPU.
>>> If AMU registers are used in CPPC, they must be using FFH GAS, in which
>>> case the interpretation of FFH is architecture dependent code.
>> According to drivers/cpufreq/Makefile, cppc_cpufreq.c is only compiled with
>> ARM architecture.
>>
> Well that's true but this change doesn't belong to cppc_cpufreq.c, it must
> be part of drivers/acpi/cppc_acpi.c IMO and sorry I assumed that without
> explicitly mentioning that here.
>
>> But here, I would change cpu_has_amu_feat() with cpc_ffh_supported(), which
>> belongs to FFH APIs.
>>
> It is not like that. cppc_acpi.c will know the GAS is FFH based so no need to
> check anything there. I see counters_read_on_cpu() called from cpc_ffh_read()
> already takes care of reading the AMUs on the right CPU. What exactly is
> the issue you are seeing ? I don't if this change is needed at all.
>
> --
> Regards,
> Sudeep


In this scenario, both topology.c and cppc_acpi.c do not provide an API
to keep the AMU online

during the whole sampling period. Just using cpc_read_ffh() at the start
and end of the sampling

period is not enough.


Zeng Heng

2023-10-26 11:26:54

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH 3/3] cpufreq: CPPC: Eliminate the impact of cpc_read() latency error

On Thu, Oct 26, 2023 at 09:55:39AM +0800, Zeng Heng wrote:
>
> 在 2023/10/25 19:01, Mark Rutland 写道:
> > On Wed, Oct 25, 2023 at 05:38:47PM +0800, Zeng Heng wrote:
> >
> > The previous patch added this function, and calls it with smp_call_on_cpu(),
> > where it'll run in IRQ context with IRQs disabled...
>
> smp_call_on_cpu() puts the work to the bind-cpu worker.

Ah, sorry -- I had confused this with the smp_call_function*() family, which do
this in IRQ context.

> And this function will be called in task context, and IRQs is certainly enabled.

Understood; given that, please ignore my comments below.

Mark.

>
>
> Zeng Heng
>
> > > struct fb_ctr_pair *fb_ctrs = val;
> > > int cpu = fb_ctrs->cpu;
> > > int ret;
> > > + unsigned long timeout;
> > > ret = cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t0);
> > > if (ret)
> > > return ret;
> > > - udelay(2); /* 2usec delay between sampling */
> > > + if (likely(!irqs_disabled())) {
> > > + /*
> > > + * Set 1ms as sampling interval, but never schedule
> > > + * to the idle task to prevent the AMU counters from
> > > + * stopping working.
> > > + */
> > > + timeout = jiffies + msecs_to_jiffies(1);
> > > + while (!time_after(jiffies, timeout))
> > > + cond_resched();
> > > +
> > > + } else {
> > ... so we'll enter this branch of the if-else ...
> >
> > > + pr_warn_once("CPU%d: Get rate in atomic context", cpu);
> > ... and pr_warn_once() for something that's apparently normal and outside of
> > the user's control?
> >
> > That doesn't make much sense to me.
> >
> > Mark.
> >
> > > + udelay(2); /* 2usec delay between sampling */
> > > + }
> > > return cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t1);
> > > }
> > > --
> > > 2.25.1
> > >

2023-10-30 13:20:15

by Beata Michalska

[permalink] [raw]
Subject: Re: [PATCH 2/3] cpufreq: CPPC: Keep the target core awake when reading its cpufreq rate

On Wed, Oct 25, 2023 at 08:27:23PM +0530, Sumit Gupta wrote:
>
>
>
> > [adding Ionela]
> >
> > On Wed, Oct 25, 2023 at 05:38:46PM +0800, Zeng Heng wrote:
> > > As ARM AMU's document says, all counters are subject to any changes
> > > in clock frequency, including clock stopping caused by the WFI and WFE
> > > instructions.
> > >
> > > Therefore, using smp_call_on_cpu() to trigger target CPU to
> > > read self's AMU counters, which ensures the counters are working
> > > properly while cstate feature is enabled.
> >
> > IIUC there's a pretty deliberate split with all the actual reading of the AMU
> > living in arch/arm64/kernel/topolgy.c, and the driver code being (relatively)
> > generic.
> >
> > We already have code in arch/arm64/kernel/topolgy.c to read counters on a
> > specific CPU; why can't e reuse that (and avoid exporting cpu_has_amu_feat())?
>
>
> This patch seems mostly based on my previous patch [1] and discussed here
> [2] already. Beata [CCed] shared an alternate approach [3] leveraging
> existing code from 'topology.c' to get the average freq for last tick
> period.
>
>
> Beata,
>
> Could you share v2 of [3] with the request to merge. We can try to solve the
> issue with CPU IDLE case later on top?
>
Will do (same for the below request if feasible)

---
BR
B.
> Additionally, also please include the fix in [4] if it looks fine.
>
> Best Regards,
> Sumit Gupta
>
> [1] https://lore.kernel.org/all/[email protected]/
> [2] https://lore.kernel.org/lkml/[email protected]/T/#m2174305de4706006e0bd9c103a0e5ff61cea7a12
> [3]
> https://lore.kernel.org/lkml/[email protected]/
> [4]
> https://lore.kernel.org/lkml/[email protected]/
>
>
> > >
> > > Reported-by: Sumit Gupta <[email protected]>
> > > Link: https://lore.kernel.org/all/[email protected]/
> > > Signed-off-by: Zeng Heng <[email protected]>
> > > ---
> > > drivers/cpufreq/cppc_cpufreq.c | 39 ++++++++++++++++++++++++++--------
> > > 1 file changed, 30 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> > > index fe08ca419b3d..321a9dc9484d 100644
> > > --- a/drivers/cpufreq/cppc_cpufreq.c
> > > +++ b/drivers/cpufreq/cppc_cpufreq.c
> > > @@ -90,6 +90,12 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
> > > struct cppc_perf_fb_ctrs *fb_ctrs_t0,
> > > struct cppc_perf_fb_ctrs *fb_ctrs_t1);
> > >
> > > +struct fb_ctr_pair {
> > > + u32 cpu;
> > > + struct cppc_perf_fb_ctrs fb_ctrs_t0;
> > > + struct cppc_perf_fb_ctrs fb_ctrs_t1;
> > > +};
> > > +
> > > /**
> > > * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
> > > * @work: The work item.
> > > @@ -840,9 +846,24 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
> > > return (reference_perf * delta_delivered) / delta_reference;
> > > }
> > >
> > > +static int cppc_get_perf_ctrs_pair(void *val)
> > > +{
> > > + struct fb_ctr_pair *fb_ctrs = val;
> > > + int cpu = fb_ctrs->cpu;
> > > + int ret;
> > > +
> > > + ret = cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t0);
> > > + if (ret)
> > > + return ret;
> > > +
> > > + udelay(2); /* 2usec delay between sampling */
> > > +
> > > + return cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t1);
> > > +}
> > > +
> > > static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
> > > {
> > > - struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
> > > + struct fb_ctr_pair fb_ctrs = { .cpu = cpu, };
> > > struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
> > > struct cppc_cpudata *cpu_data = policy->driver_data;
> > > u64 delivered_perf;
> > > @@ -850,18 +871,18 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
> > >
> > > cpufreq_cpu_put(policy);
> > >
> > > - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
> > > - if (ret)
> > > - return 0;
> > > -
> > > - udelay(2); /* 2usec delay between sampling */
> > > + if (cpu_has_amu_feat(cpu))
> > > + ret = smp_call_on_cpu(cpu, cppc_get_perf_ctrs_pair,
> > > + &fb_ctrs, false);
> > > + else
> > > + ret = cppc_get_perf_ctrs_pair(&fb_ctrs);
> > >
> > > - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
> > > if (ret)
> > > return 0;
> > >
> > > - delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
> > > - &fb_ctrs_t1);
> > > + delivered_perf = cppc_perf_from_fbctrs(cpu_data,
> > > + &fb_ctrs.fb_ctrs_t0,
> > > + &fb_ctrs.fb_ctrs_t1);
> > >
> > > return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
> > > }
> > > --
> > > 2.25.1
> > >

2023-10-31 23:53:15

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 2/3] cpufreq: CPPC: Keep the target core awake when reading its cpufreq rate

Hi Zeng,

kernel test robot noticed the following build errors:

[auto build test ERROR on rafael-pm/linux-next]
[also build test ERROR on rafael-pm/acpi-bus arm64/for-next/core linus/master v6.6 next-20231031]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Zeng-Heng/arm64-cpufeature-Export-cpu_has_amu_feat/20231025-173559
base: https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link: https://lore.kernel.org/r/20231025093847.3740104-3-zengheng4%40huawei.com
patch subject: [PATCH 2/3] cpufreq: CPPC: Keep the target core awake when reading its cpufreq rate
config: arm64-randconfig-003-20231101 (https://download.01.org/0day-ci/archive/20231101/[email protected]/config)
compiler: aarch64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231101/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All error/warnings (new ones prefixed by >>):

drivers/cpufreq/cppc_cpufreq.c: In function 'cppc_get_perf_ctrs_pair':
>> drivers/cpufreq/cppc_cpufreq.c:852:26: error: invalid use of undefined type 'struct fb_ctr_pair'
852 | int cpu = fb_ctrs->cpu;
| ^~
drivers/cpufreq/cppc_cpufreq.c:855:47: error: invalid use of undefined type 'struct fb_ctr_pair'
855 | ret = cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t0);
| ^~
drivers/cpufreq/cppc_cpufreq.c:861:48: error: invalid use of undefined type 'struct fb_ctr_pair'
861 | return cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t1);
| ^~
drivers/cpufreq/cppc_cpufreq.c: In function 'cppc_cpufreq_get_rate':
>> drivers/cpufreq/cppc_cpufreq.c:866:16: error: variable 'fb_ctrs' has initializer but incomplete type
866 | struct fb_ctr_pair fb_ctrs = { .cpu = cpu, };
| ^~~~~~~~~~~
>> drivers/cpufreq/cppc_cpufreq.c:866:41: error: 'struct fb_ctr_pair' has no member named 'cpu'
866 | struct fb_ctr_pair fb_ctrs = { .cpu = cpu, };
| ^~~
>> drivers/cpufreq/cppc_cpufreq.c:866:47: warning: excess elements in struct initializer
866 | struct fb_ctr_pair fb_ctrs = { .cpu = cpu, };
| ^~~
drivers/cpufreq/cppc_cpufreq.c:866:47: note: (near initialization for 'fb_ctrs')
>> drivers/cpufreq/cppc_cpufreq.c:866:28: error: storage size of 'fb_ctrs' isn't known
866 | struct fb_ctr_pair fb_ctrs = { .cpu = cpu, };
| ^~~~~~~
>> drivers/cpufreq/cppc_cpufreq.c:866:28: warning: unused variable 'fb_ctrs' [-Wunused-variable]


vim +852 drivers/cpufreq/cppc_cpufreq.c

848
849 static int cppc_get_perf_ctrs_pair(void *val)
850 {
851 struct fb_ctr_pair *fb_ctrs = val;
> 852 int cpu = fb_ctrs->cpu;
853 int ret;
854
855 ret = cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t0);
856 if (ret)
857 return ret;
858
859 udelay(2); /* 2usec delay between sampling */
860
861 return cppc_get_perf_ctrs(cpu, &fb_ctrs->fb_ctrs_t1);
862 }
863
864 static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
865 {
> 866 struct fb_ctr_pair fb_ctrs = { .cpu = cpu, };
867 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
868 struct cppc_cpudata *cpu_data = policy->driver_data;
869 u64 delivered_perf;
870 int ret;
871
872 cpufreq_cpu_put(policy);
873
874 if (cpu_has_amu_feat(cpu))
875 ret = smp_call_on_cpu(cpu, cppc_get_perf_ctrs_pair,
876 &fb_ctrs, false);
877 else
878 ret = cppc_get_perf_ctrs_pair(&fb_ctrs);
879
880 if (ret)
881 return 0;
882
883 delivered_perf = cppc_perf_from_fbctrs(cpu_data,
884 &fb_ctrs.fb_ctrs_t0,
885 &fb_ctrs.fb_ctrs_t1);
886
887 return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
888 }
889

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki