2016-11-07 07:39:26

by Akshay Adiga

[permalink] [raw]
Subject: [PATCH v2 1/2] cpufreq: powernv: Adding fast_switch for schedutil

Adding fast_switch which does light weight operation to set the desired
pstate. Both global and local pstates are set to the same desired pstate.

Signed-off-by: Akshay Adiga <[email protected]>
---
Changes from v1 :
- Removed unnecessary check for index out of bound.

drivers/cpufreq/powernv-cpufreq.c | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index d3ffde8..4a4380d 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -752,9 +752,12 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
spin_lock_init(&gpstates->gpstate_lock);
ret = cpufreq_table_validate_and_show(policy, powernv_freqs);

- if (ret < 0)
+ if (ret < 0) {
kfree(policy->driver_data);
+ return ret;
+ }

+ policy->fast_switch_possible = true;
return ret;
}

@@ -897,6 +900,20 @@ static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
del_timer_sync(&gpstates->timer);
}

+static unsigned int powernv_fast_switch(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ int index;
+ struct powernv_smp_call_data freq_data;
+
+ index = cpufreq_table_find_index_dl(policy, target_freq);
+ freq_data.pstate_id = powernv_freqs[index].driver_data;
+ freq_data.gpstate_id = powernv_freqs[index].driver_data;
+ set_pstate(&freq_data);
+
+ return powernv_freqs[index].frequency;
+}
+
static struct cpufreq_driver powernv_cpufreq_driver = {
.name = "powernv-cpufreq",
.flags = CPUFREQ_CONST_LOOPS,
@@ -904,6 +921,7 @@ static struct cpufreq_driver powernv_cpufreq_driver = {
.exit = powernv_cpufreq_cpu_exit,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = powernv_cpufreq_target_index,
+ .fast_switch = powernv_fast_switch,
.get = powernv_cpufreq_get,
.stop_cpu = powernv_cpufreq_stop_cpu,
.attr = powernv_cpu_freq_attr,
--
2.5.5


2016-11-07 07:39:29

by Akshay Adiga

[permalink] [raw]
Subject: [PATCH v2 2/2] cpufreq: powernv: Use PMCR to verify global and local pstate

As fast_switch() may get called with interrupt disable mode, we cannot
hold a mutex to update the global_pstate_info. So currently, fast_switch()
does not update the global_pstate_info and it will end up with stale data
whenever pstate is updated through fast_switch().

As the gpstate_timer can fire after fast_switch() has updated the pstates,
the timer handler cannot rely on the cached values of local and global
pstate and needs to read it from the PMCR.

Only gpstate_timer_handler() is affected by the stale cached pstate data
beacause either fast_switch() or target_index() routines will be called
for a given govenor, but gpstate_timer can fire after the governor has
changed to schedutil.


Signed-off-by: Akshay Adiga <[email protected]>
---

Changes from v1 :
- Corrected Commit message
- Type cast pstate values read from PMCR to type s8
- Added Macros to get local and global pstates from PMCR


drivers/cpufreq/powernv-cpufreq.c | 34 ++++++++++++++++++++++++----------
1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 4a4380d..bf4bc585 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -42,6 +42,8 @@
#define PMSR_PSAFE_ENABLE (1UL << 30)
#define PMSR_SPR_EM_DISABLE (1UL << 31)
#define PMSR_MAX(x) ((x >> 32) & 0xFF)
+#define PMCR_LPSTATE(x) (((x) >> 48) & 0xFF)
+#define PMCR_GPSTATE(x) (((x) >> 56) & 0xFF)

#define MAX_RAMP_DOWN_TIME 5120
/*
@@ -592,7 +594,8 @@ void gpstate_timer_handler(unsigned long data)
{
struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
struct global_pstate_info *gpstates = policy->driver_data;
- int gpstate_idx;
+ int gpstate_idx, lpstate_idx;
+ unsigned long val;
unsigned int time_diff = jiffies_to_msecs(jiffies)
- gpstates->last_sampled_time;
struct powernv_smp_call_data freq_data;
@@ -600,21 +603,36 @@ void gpstate_timer_handler(unsigned long data)
if (!spin_trylock(&gpstates->gpstate_lock))
return;

+ /*
+ * If PMCR was last updated was using fast_swtich then
+ * We may have wrong in gpstate->last_lpstate_idx
+ * value. Hence, read from PMCR to get correct data.
+ */
+ val = get_pmspr(SPRN_PMCR);
+ freq_data.gpstate_id = (s8)PMCR_GPSTATE(val);
+ freq_data.pstate_id = (s8)PMCR_LPSTATE(val);
+ if (freq_data.gpstate_id == freq_data.pstate_id) {
+ reset_gpstates(policy);
+ spin_unlock(&gpstates->gpstate_lock);
+ return;
+ }
+
gpstates->last_sampled_time += time_diff;
gpstates->elapsed_time += time_diff;
- freq_data.pstate_id = idx_to_pstate(gpstates->last_lpstate_idx);

- if ((gpstates->last_gpstate_idx == gpstates->last_lpstate_idx) ||
- (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) {
+ if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) {
gpstate_idx = pstate_to_idx(freq_data.pstate_id);
reset_gpstates(policy);
gpstates->highest_lpstate_idx = gpstate_idx;
} else {
+ lpstate_idx = pstate_to_idx(freq_data.pstate_id);
gpstate_idx = calc_global_pstate(gpstates->elapsed_time,
gpstates->highest_lpstate_idx,
- gpstates->last_lpstate_idx);
+ lpstate_idx);
}
-
+ freq_data.gpstate_id = idx_to_pstate(gpstate_idx);
+ gpstates->last_gpstate_idx = gpstate_idx;
+ gpstates->last_lpstate_idx = lpstate_idx;
/*
* If local pstate is equal to global pstate, rampdown is over
* So timer is not required to be queued.
@@ -622,10 +640,6 @@ void gpstate_timer_handler(unsigned long data)
if (gpstate_idx != gpstates->last_lpstate_idx)
queue_gpstate_timer(gpstates);

- freq_data.gpstate_id = idx_to_pstate(gpstate_idx);
- gpstates->last_gpstate_idx = pstate_to_idx(freq_data.gpstate_id);
- gpstates->last_lpstate_idx = pstate_to_idx(freq_data.pstate_id);
-
spin_unlock(&gpstates->gpstate_lock);

/* Timer may get migrated to a different cpu on cpu hot unplug */
--
2.5.5

2016-11-07 09:12:46

by Viresh Kumar

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] cpufreq: powernv: Adding fast_switch for schedutil

On 07-11-16, 13:09, Akshay Adiga wrote:
> Adding fast_switch which does light weight operation to set the desired
> pstate. Both global and local pstates are set to the same desired pstate.
>
> Signed-off-by: Akshay Adiga <[email protected]>
> ---
> Changes from v1 :
> - Removed unnecessary check for index out of bound.

Provided Gautham/Shilpa find these patches fine..

Acked-by: Viresh Kumar <[email protected]>

--
viresh

2016-11-08 03:40:58

by Gautham R Shenoy

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] cpufreq: powernv: Use PMCR to verify global and local pstate


On Mon, Nov 07, 2016 at 01:09:09PM +0530, Akshay Adiga wrote:
> As fast_switch() may get called with interrupt disable mode, we cannot
> hold a mutex to update the global_pstate_info. So currently, fast_switch()
> does not update the global_pstate_info and it will end up with stale data
> whenever pstate is updated through fast_switch().
>
> As the gpstate_timer can fire after fast_switch() has updated the pstates,
> the timer handler cannot rely on the cached values of local and global
> pstate and needs to read it from the PMCR.
>
> Only gpstate_timer_handler() is affected by the stale cached pstate data
> beacause either fast_switch() or target_index() routines will be called
> for a given govenor, but gpstate_timer can fire after the governor has
> changed to schedutil.
>
>
> Signed-off-by: Akshay Adiga <[email protected]>
> ---
>
> Changes from v1 :
> - Corrected Commit message
> - Type cast pstate values read from PMCR to type s8
> - Added Macros to get local and global pstates from PMCR

Thanks for this. Could you also send a (separate patch) to set the
local and global pstates to PMCR in set_pstate?

>
>
> drivers/cpufreq/powernv-cpufreq.c | 34 ++++++++++++++++++++++++----------
> 1 file changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
> index 4a4380d..bf4bc585 100644
> --- a/drivers/cpufreq/powernv-cpufreq.c
> +++ b/drivers/cpufreq/powernv-cpufreq.c
> @@ -42,6 +42,8 @@
> #define PMSR_PSAFE_ENABLE (1UL << 30)
> #define PMSR_SPR_EM_DISABLE (1UL << 31)
> #define PMSR_MAX(x) ((x >> 32) & 0xFF)
> +#define PMCR_LPSTATE(x) (((x) >> 48) & 0xFF)
> +#define PMCR_GPSTATE(x) (((x) >> 56) & 0xFF)

You define:
#define LPSTATE_SHIFT 48
#define GPSTATE_SHIFT 56

since we can use this in the set_variants.

Moreover, the LPSTATE, GPSTATE retreival is applicable to both PMCR and PMSR. So
could you rename these functions to GET_LPSTATE, GET_GPSTATE.

Similarly, we might want to have a SET_LPSTATE, SET_GPSTATE and fix
the hard coded values that we have in set_pstate.


>
> #define MAX_RAMP_DOWN_TIME 5120
> /*
> @@ -592,7 +594,8 @@ void gpstate_timer_handler(unsigned long data)
> {
> struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
> struct global_pstate_info *gpstates = policy->driver_data;
> - int gpstate_idx;
> + int gpstate_idx, lpstate_idx;
> + unsigned long val;
> unsigned int time_diff = jiffies_to_msecs(jiffies)
> - gpstates->last_sampled_time;
> struct powernv_smp_call_data freq_data;
> @@ -600,21 +603,36 @@ void gpstate_timer_handler(unsigned long data)
> if (!spin_trylock(&gpstates->gpstate_lock))
> return;
>
> + /*
> + * If PMCR was last updated was using fast_swtich then
> + * We may have wrong in gpstate->last_lpstate_idx
> + * value. Hence, read from PMCR to get correct data.
> + */
> + val = get_pmspr(SPRN_PMCR);
> + freq_data.gpstate_id = (s8)PMCR_GPSTATE(val);
> + freq_data.pstate_id = (s8)PMCR_LPSTATE(val);
> + if (freq_data.gpstate_id == freq_data.pstate_id) {
> + reset_gpstates(policy);
> + spin_unlock(&gpstates->gpstate_lock);
> + return;
> + }
> +
> gpstates->last_sampled_time += time_diff;
> gpstates->elapsed_time += time_diff;
> - freq_data.pstate_id = idx_to_pstate(gpstates->last_lpstate_idx);
>
> - if ((gpstates->last_gpstate_idx == gpstates->last_lpstate_idx) ||
> - (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) {
> + if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) {
> gpstate_idx = pstate_to_idx(freq_data.pstate_id);
> reset_gpstates(policy);
> gpstates->highest_lpstate_idx = gpstate_idx;
> } else {
> + lpstate_idx = pstate_to_idx(freq_data.pstate_id);
> gpstate_idx = calc_global_pstate(gpstates->elapsed_time,
> gpstates->highest_lpstate_idx,
> - gpstates->last_lpstate_idx);
> + lpstate_idx);
> }
> -
> + freq_data.gpstate_id = idx_to_pstate(gpstate_idx);
> + gpstates->last_gpstate_idx = gpstate_idx;
> + gpstates->last_lpstate_idx = lpstate_idx;
> /*
> * If local pstate is equal to global pstate, rampdown is over
> * So timer is not required to be queued.
> @@ -622,10 +640,6 @@ void gpstate_timer_handler(unsigned long data)
> if (gpstate_idx != gpstates->last_lpstate_idx)
> queue_gpstate_timer(gpstates);
>
> - freq_data.gpstate_id = idx_to_pstate(gpstate_idx);
> - gpstates->last_gpstate_idx = pstate_to_idx(freq_data.gpstate_id);
> - gpstates->last_lpstate_idx = pstate_to_idx(freq_data.pstate_id);
> -
> spin_unlock(&gpstates->gpstate_lock);
>
> /* Timer may get migrated to a different cpu on cpu hot unplug */
> --
> 2.5.5

Looks good otherwise.

Reviewed-by: Gautham R. Shenoy <[email protected]>
>

2016-11-08 13:59:34

by Akshay Adiga

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] cpufreq: powernv: Use PMCR to verify global and local pstate

Thanks gautham for the review.

Good point, I have made the macros more generic in the next version as
you have mentioned.

I will post a separate patch to set pstates using these macros. :)

On 11/08/2016 09:10 AM, Gautham R Shenoy wrote:
> On Mon, Nov 07, 2016 at 01:09:09PM +0530, Akshay Adiga wrote:
>> As fast_switch() may get called with interrupt disable mode, we cannot
>> hold a mutex to update the global_pstate_info. So currently, fast_switch()
>> does not update the global_pstate_info and it will end up with stale data
>> whenever pstate is updated through fast_switch().
>>
>> As the gpstate_timer can fire after fast_switch() has updated the pstates,
>> the timer handler cannot rely on the cached values of local and global
>> pstate and needs to read it from the PMCR.
>>
>> Only gpstate_timer_handler() is affected by the stale cached pstate data
>> beacause either fast_switch() or target_index() routines will be called
>> for a given govenor, but gpstate_timer can fire after the governor has
>> changed to schedutil.
>>
>>
>> Signed-off-by: Akshay Adiga <[email protected]>
>> ---
>>
>> Changes from v1 :
>> - Corrected Commit message
>> - Type cast pstate values read from PMCR to type s8
>> - Added Macros to get local and global pstates from PMCR
> Thanks for this. Could you also send a (separate patch) to set the
> local and global pstates to PMCR in set_pstate?
>
>>
>> drivers/cpufreq/powernv-cpufreq.c | 34 ++++++++++++++++++++++++----------
>> 1 file changed, 24 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
>> index 4a4380d..bf4bc585 100644
>> --- a/drivers/cpufreq/powernv-cpufreq.c
>> +++ b/drivers/cpufreq/powernv-cpufreq.c
>> @@ -42,6 +42,8 @@
>> #define PMSR_PSAFE_ENABLE (1UL << 30)
>> #define PMSR_SPR_EM_DISABLE (1UL << 31)
>> #define PMSR_MAX(x) ((x >> 32) & 0xFF)
>> +#define PMCR_LPSTATE(x) (((x) >> 48) & 0xFF)
>> +#define PMCR_GPSTATE(x) (((x) >> 56) & 0xFF)
> You define:
> #define LPSTATE_SHIFT 48
> #define GPSTATE_SHIFT 56
>
> since we can use this in the set_variants.
>
> Moreover, the LPSTATE, GPSTATE retreival is applicable to both PMCR and PMSR. So
> could you rename these functions to GET_LPSTATE, GET_GPSTATE.
>
> Similarly, we might want to have a SET_LPSTATE, SET_GPSTATE and fix
> the hard coded values that we have in set_pstate.
>
>
>> #define MAX_RAMP_DOWN_TIME 5120
>> /*
>> @@ -592,7 +594,8 @@ void gpstate_timer_handler(unsigned long data)
>> {
>> struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
>> struct global_pstate_info *gpstates = policy->driver_data;
>> - int gpstate_idx;
>> + int gpstate_idx, lpstate_idx;
>> + unsigned long val;
>> unsigned int time_diff = jiffies_to_msecs(jiffies)
>> - gpstates->last_sampled_time;
>> struct powernv_smp_call_data freq_data;
>> @@ -600,21 +603,36 @@ void gpstate_timer_handler(unsigned long data)
>> if (!spin_trylock(&gpstates->gpstate_lock))
>> return;
>>
>> + /*
>> + * If PMCR was last updated was using fast_swtich then
>> + * We may have wrong in gpstate->last_lpstate_idx
>> + * value. Hence, read from PMCR to get correct data.
>> + */
>> + val = get_pmspr(SPRN_PMCR);
>> + freq_data.gpstate_id = (s8)PMCR_GPSTATE(val);
>> + freq_data.pstate_id = (s8)PMCR_LPSTATE(val);
>> + if (freq_data.gpstate_id == freq_data.pstate_id) {
>> + reset_gpstates(policy);
>> + spin_unlock(&gpstates->gpstate_lock);
>> + return;
>> + }
>> +
>> gpstates->last_sampled_time += time_diff;
>> gpstates->elapsed_time += time_diff;
>> - freq_data.pstate_id = idx_to_pstate(gpstates->last_lpstate_idx);
>>
>> - if ((gpstates->last_gpstate_idx == gpstates->last_lpstate_idx) ||
>> - (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) {
>> + if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) {
>> gpstate_idx = pstate_to_idx(freq_data.pstate_id);
>> reset_gpstates(policy);
>> gpstates->highest_lpstate_idx = gpstate_idx;
>> } else {
>> + lpstate_idx = pstate_to_idx(freq_data.pstate_id);
>> gpstate_idx = calc_global_pstate(gpstates->elapsed_time,
>> gpstates->highest_lpstate_idx,
>> - gpstates->last_lpstate_idx);
>> + lpstate_idx);
>> }
>> -
>> + freq_data.gpstate_id = idx_to_pstate(gpstate_idx);
>> + gpstates->last_gpstate_idx = gpstate_idx;
>> + gpstates->last_lpstate_idx = lpstate_idx;
>> /*
>> * If local pstate is equal to global pstate, rampdown is over
>> * So timer is not required to be queued.
>> @@ -622,10 +640,6 @@ void gpstate_timer_handler(unsigned long data)
>> if (gpstate_idx != gpstates->last_lpstate_idx)
>> queue_gpstate_timer(gpstates);
>>
>> - freq_data.gpstate_id = idx_to_pstate(gpstate_idx);
>> - gpstates->last_gpstate_idx = pstate_to_idx(freq_data.gpstate_id);
>> - gpstates->last_lpstate_idx = pstate_to_idx(freq_data.pstate_id);
>> -
>> spin_unlock(&gpstates->gpstate_lock);
>>
>> /* Timer may get migrated to a different cpu on cpu hot unplug */
>> --
>> 2.5.5
> Looks good otherwise.
>
> Reviewed-by: Gautham R. Shenoy <[email protected]>