The play_idle function has two users, the intel powerclamp and the
idle_injection.
The idle injection cooling device uses the function via the
idle_injection powercap's APIs. Unfortunately, play_idle is currently
limited by the idle state depth: by default the deepest idle state is
selected. On the ARM[64] platforms, most of the time it is the cluster
idle state, the exit latency and the residency can be very high. That
reduces the scope of the idle injection usage because the impact on
the performances can be very significant.
If the idle injection cycles can be done with a shallow state like a
retention state, the cooling effect would eventually give similar
results than the cpufreq cooling device.
In order to prepare the function to receive an idle state parameter,
let's replace the 'use_deepest_state' boolean field with 'use_state'
and use this value to enter the specific idle state.
The current code keeps the default behavior which is go to the deepest
idle state.
Signed-off-by: Daniel Lezcano <[email protected]>
Acked-by: Mathieu Poirier <[email protected]>
Reviewed-by: Ulf Hansson <[email protected]>
---
V6:
- Change use_state variable name by use_state_idx:
https://lkml.org/lkml/2019/10/28/874
V5:
- Fix s2idle default idle state value:
https://lkml.org/lkml/2019/10/15/522
---
drivers/cpuidle/cpuidle.c | 21 +++++++++++----------
include/linux/cpuidle.h | 13 ++++++-------
kernel/sched/idle.c | 11 +++++++----
3 files changed, 24 insertions(+), 21 deletions(-)
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 0895b988fa92..18523ea6b11b 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -99,31 +99,31 @@ static int find_deepest_state(struct cpuidle_driver *drv,
}
/**
- * cpuidle_use_deepest_state - Set/clear governor override flag.
- * @enable: New value of the flag.
+ * cpuidle_use_state - Force the cpuidle framework to enter an idle state.
+ * @state: An integer for an idle state
*
- * Set/unset the current CPU to use the deepest idle state (override governors
- * going forward if set).
+ * Specify an idle state the cpuidle framework must step in and bypass
+ * the idle state selection process.
*/
-void cpuidle_use_deepest_state(bool enable)
+void cpuidle_use_state(int index)
{
struct cpuidle_device *dev;
preempt_disable();
dev = cpuidle_get_device();
if (dev)
- dev->use_deepest_state = enable;
+ dev->use_state_idx = index;
preempt_enable();
}
/**
* cpuidle_find_deepest_state - Find the deepest available idle state.
- * @drv: cpuidle driver for the given CPU.
- * @dev: cpuidle device for the given CPU.
*/
-int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+int cpuidle_find_deepest_state(void)
{
+ struct cpuidle_device *dev = cpuidle_get_device();
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+
return find_deepest_state(drv, dev, UINT_MAX, 0, false);
}
@@ -554,6 +554,7 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev)
static void __cpuidle_device_init(struct cpuidle_device *dev)
{
memset(dev->states_usage, 0, sizeof(dev->states_usage));
+ dev->use_state_idx = CPUIDLE_STATE_NOUSE;
dev->last_residency = 0;
dev->next_hrtimer = 0;
}
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 4b6b5bea8f79..d53ad36cb2de 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -15,6 +15,7 @@
#include <linux/list.h>
#include <linux/hrtimer.h>
+#define CPUIDLE_STATE_NOUSE -1
#define CPUIDLE_STATE_MAX 10
#define CPUIDLE_NAME_LEN 16
#define CPUIDLE_DESC_LEN 32
@@ -80,11 +81,11 @@ struct cpuidle_driver_kobj;
struct cpuidle_device {
unsigned int registered:1;
unsigned int enabled:1;
- unsigned int use_deepest_state:1;
unsigned int poll_time_limit:1;
unsigned int cpu;
ktime_t next_hrtimer;
+ int use_state_idx;
int last_state_idx;
int last_residency;
u64 poll_limit_ns;
@@ -203,19 +204,17 @@ static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; }
#endif
#ifdef CONFIG_CPU_IDLE
-extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev);
+extern int cpuidle_find_deepest_state(void);
extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
struct cpuidle_device *dev);
-extern void cpuidle_use_deepest_state(bool enable);
+extern void cpuidle_use_state(int index);
#else
-static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+static inline int cpuidle_find_deepest_state(void)
{return -ENODEV; }
static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{return -ENODEV; }
-static inline void cpuidle_use_deepest_state(bool enable)
+static inline void cpuidle_use_state(int index)
{
}
#endif
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 8dad5aa600ea..fb9fc93f1497 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -165,7 +165,8 @@ static void cpuidle_idle_call(void)
* until a proper wakeup interrupt happens.
*/
- if (idle_should_enter_s2idle() || dev->use_deepest_state) {
+ if (idle_should_enter_s2idle() ||
+ dev->use_state_idx != CPUIDLE_STATE_NOUSE) {
if (idle_should_enter_s2idle()) {
rcu_idle_enter();
@@ -176,12 +177,14 @@ static void cpuidle_idle_call(void)
}
rcu_idle_exit();
+ next_state = cpuidle_find_deepest_state();
+ } else {
+ next_state = dev->use_state_idx;
}
tick_nohz_idle_stop_tick();
rcu_idle_enter();
- next_state = cpuidle_find_deepest_state(drv, dev);
call_cpuidle(drv, dev, next_state);
} else {
bool stop_tick = true;
@@ -328,7 +331,7 @@ void play_idle(unsigned long duration_us)
rcu_sleep_check();
preempt_disable();
current->flags |= PF_IDLE;
- cpuidle_use_deepest_state(true);
+ cpuidle_use_state(cpuidle_find_deepest_state());
it.done = 0;
hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -339,7 +342,7 @@ void play_idle(unsigned long duration_us)
while (!READ_ONCE(it.done))
do_idle();
- cpuidle_use_deepest_state(false);
+ cpuidle_use_state(CPUIDLE_STATE_NOUSE);
current->flags &= ~PF_IDLE;
preempt_fold_need_resched();
--
2.17.1
Currently the idle injection framework only allows to inject the
deepest idle state available on the system.
Give the opportunity to specify which idle state we want to inject by
adding a new function helper to set the state and use it when calling
play_idle().
Signed-off-by: Daniel Lezcano <[email protected]>
Acked-by: Mathieu Poirier <[email protected]>
Reviewed-by: Ulf Hansson <[email protected]>
---
V6:
- Rename variable name 'state' -> 'state_idx':
https://lkml.org/lkml/2019/10/28/874
---
drivers/powercap/idle_inject.c | 14 +++++++++++++-
include/linux/idle_inject.h | 3 +++
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
index 233c878cbf46..2607d3e9afc5 100644
--- a/drivers/powercap/idle_inject.c
+++ b/drivers/powercap/idle_inject.c
@@ -66,6 +66,7 @@ struct idle_inject_thread {
*/
struct idle_inject_device {
struct hrtimer timer;
+ int state_idx;
unsigned int idle_duration_us;
unsigned int run_duration_us;
unsigned long int cpumask[0];
@@ -140,7 +141,7 @@ static void idle_inject_fn(unsigned int cpu)
iit->should_run = 0;
play_idle(READ_ONCE(ii_dev->idle_duration_us),
- cpuidle_find_deepest_state());
+ READ_ONCE(ii_dev->state_idx));
}
/**
@@ -171,6 +172,16 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev,
*idle_duration_us = READ_ONCE(ii_dev->idle_duration_us);
}
+/**
+ * idle_inject_set_state - set the idle state to inject
+ * @state: an integer for the idle state to inject
+ */
+void idle_inject_set_state(struct idle_inject_device *ii_dev, int index)
+{
+ if (index >= CPUIDLE_STATE_NOUSE && index < CPUIDLE_STATE_MAX)
+ WRITE_ONCE(ii_dev->state_idx, index);
+}
+
/**
* idle_inject_start - start idle injections
* @ii_dev: idle injection control device structure
@@ -299,6 +310,7 @@ struct idle_inject_device *idle_inject_register(struct cpumask *cpumask)
cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask);
hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
ii_dev->timer.function = idle_inject_timer_fn;
+ ii_dev->state_idx = 0;
for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h
index a445cd1a36c5..2efc60252d7b 100644
--- a/include/linux/idle_inject.h
+++ b/include/linux/idle_inject.h
@@ -26,4 +26,7 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev,
void idle_inject_get_duration(struct idle_inject_device *ii_dev,
unsigned int *run_duration_us,
unsigned int *idle_duration_us);
+
+void idle_inject_set_state(struct idle_inject_device *ii_dev, int index);
+
#endif /* __IDLE_INJECT_H__ */
--
2.17.1
Currently, the play_idle function does not allow to tell which idle
state we want to go. Improve this by passing the idle state as
parameter to the function.
Export cpuidle_find_deepest_state() symbol as it is used from the
intel_powerclamp driver as a module.
There is no functional changes, the cpuidle state is the deepest one.
Signed-off-by: Daniel Lezcano <[email protected]>
Acked-by: Mathieu Poirier <[email protected]>
Reviewed-by: Ulf Hansson <[email protected]>
---
V6:
- Change variable name 'state' -> 'index':
https://lkml.org/lkml/2019/10/28/874
V4:
- Add EXPORT_SYMBOL_GPL(cpuidle_find_deepest_state) for the
intel_powerclamp driver when this one is compiled as a module
V3:
- Add missing cpuidle.h header
---
drivers/cpuidle/cpuidle.c | 1 +
drivers/powercap/idle_inject.c | 4 +++-
drivers/thermal/intel/intel_powerclamp.c | 4 +++-
include/linux/cpu.h | 2 +-
kernel/sched/idle.c | 4 ++--
5 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 18523ea6b11b..b871fc2e8e67 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -126,6 +126,7 @@ int cpuidle_find_deepest_state(void)
return find_deepest_state(drv, dev, UINT_MAX, 0, false);
}
+EXPORT_SYMBOL_GPL(cpuidle_find_deepest_state);
#ifdef CONFIG_SUSPEND
static void enter_s2idle_proper(struct cpuidle_driver *drv,
diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
index cd1270614cc6..233c878cbf46 100644
--- a/drivers/powercap/idle_inject.c
+++ b/drivers/powercap/idle_inject.c
@@ -38,6 +38,7 @@
#define pr_fmt(fmt) "ii_dev: " fmt
#include <linux/cpu.h>
+#include <linux/cpuidle.h>
#include <linux/hrtimer.h>
#include <linux/kthread.h>
#include <linux/sched.h>
@@ -138,7 +139,8 @@ static void idle_inject_fn(unsigned int cpu)
*/
iit->should_run = 0;
- play_idle(READ_ONCE(ii_dev->idle_duration_us));
+ play_idle(READ_ONCE(ii_dev->idle_duration_us),
+ cpuidle_find_deepest_state());
}
/**
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index 53216dcbe173..b55786c169ae 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -29,6 +29,7 @@
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/cpu.h>
+#include <linux/cpuidle.h>
#include <linux/thermal.h>
#include <linux/slab.h>
#include <linux/tick.h>
@@ -430,7 +431,8 @@ static void clamp_idle_injection_func(struct kthread_work *work)
if (should_skip)
goto balance;
- play_idle(jiffies_to_usecs(w_data->duration_jiffies));
+ play_idle(jiffies_to_usecs(w_data->duration_jiffies),
+ cpuidle_find_deepest_state());
balance:
if (clamping && w_data->clamping && cpu_online(w_data->cpu))
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d0633ebdaa9c..6f5df0b3641a 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -179,7 +179,7 @@ void arch_cpu_idle_dead(void);
int cpu_report_state(int cpu);
int cpu_check_up_prepare(int cpu);
void cpu_set_state_online(int cpu);
-void play_idle(unsigned long duration_us);
+void play_idle(unsigned long duration_us, int index);
#ifdef CONFIG_HOTPLUG_CPU
bool cpu_wait_death(unsigned int cpu, int seconds);
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index fb9fc93f1497..eb9e93b37a97 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -314,7 +314,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-void play_idle(unsigned long duration_us)
+void play_idle(unsigned long duration_us, int index)
{
struct idle_timer it;
@@ -331,7 +331,7 @@ void play_idle(unsigned long duration_us)
rcu_sleep_check();
preempt_disable();
current->flags |= PF_IDLE;
- cpuidle_use_state(cpuidle_find_deepest_state());
+ cpuidle_use_state(index);
it.done = 0;
hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
--
2.17.1
Hi Rafael,
On 30/10/2019 08:51, Daniel Lezcano wrote:
> The play_idle function has two users, the intel powerclamp and the
> idle_injection.
>
> The idle injection cooling device uses the function via the
> idle_injection powercap's APIs. Unfortunately, play_idle is currently
> limited by the idle state depth: by default the deepest idle state is
> selected. On the ARM[64] platforms, most of the time it is the cluster
> idle state, the exit latency and the residency can be very high. That
> reduces the scope of the idle injection usage because the impact on
> the performances can be very significant.
>
> If the idle injection cycles can be done with a shallow state like a
> retention state, the cooling effect would eventually give similar
> results than the cpufreq cooling device.
>
> In order to prepare the function to receive an idle state parameter,
> let's replace the 'use_deepest_state' boolean field with 'use_state'
> and use this value to enter the specific idle state.
>
> The current code keeps the default behavior which is go to the deepest
> idle state.
>
> Signed-off-by: Daniel Lezcano <[email protected]>
> Acked-by: Mathieu Poirier <[email protected]>
> Reviewed-by: Ulf Hansson <[email protected]>
Is it possible to merge this series so I can make some progress on
upstreaming the idle cooling device which depends on these three patches?
Thanks
-- Daniel
--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
On Wednesday, November 6, 2019 7:27:47 PM CET Daniel Lezcano wrote:
>
> Hi Rafael,
>
>
> On 30/10/2019 08:51, Daniel Lezcano wrote:
> > The play_idle function has two users, the intel powerclamp and the
> > idle_injection.
> >
> > The idle injection cooling device uses the function via the
> > idle_injection powercap's APIs. Unfortunately, play_idle is currently
> > limited by the idle state depth: by default the deepest idle state is
> > selected. On the ARM[64] platforms, most of the time it is the cluster
> > idle state, the exit latency and the residency can be very high. That
> > reduces the scope of the idle injection usage because the impact on
> > the performances can be very significant.
> >
> > If the idle injection cycles can be done with a shallow state like a
> > retention state, the cooling effect would eventually give similar
> > results than the cpufreq cooling device.
> >
> > In order to prepare the function to receive an idle state parameter,
> > let's replace the 'use_deepest_state' boolean field with 'use_state'
> > and use this value to enter the specific idle state.
> >
> > The current code keeps the default behavior which is go to the deepest
> > idle state.
> >
> > Signed-off-by: Daniel Lezcano <[email protected]>
> > Acked-by: Mathieu Poirier <[email protected]>
> > Reviewed-by: Ulf Hansson <[email protected]>
>
> Is it possible to merge this series so I can make some progress on
> upstreaming the idle cooling device which depends on these three patches?
That would be possible if the series had no problems, but it appears to have
some.
Let me reply to the patches.
On Wednesday, October 30, 2019 8:51:39 AM CET Daniel Lezcano wrote:
> The play_idle function has two users, the intel powerclamp and the
> idle_injection.
>
> The idle injection cooling device uses the function via the
> idle_injection powercap's APIs. Unfortunately, play_idle is currently
> limited by the idle state depth: by default the deepest idle state is
> selected. On the ARM[64] platforms, most of the time it is the cluster
> idle state, the exit latency and the residency can be very high. That
> reduces the scope of the idle injection usage because the impact on
> the performances can be very significant.
>
> If the idle injection cycles can be done with a shallow state like a
> retention state, the cooling effect would eventually give similar
> results than the cpufreq cooling device.
>
> In order to prepare the function to receive an idle state parameter,
> let's replace the 'use_deepest_state' boolean field with 'use_state'
> and use this value to enter the specific idle state.
>
> The current code keeps the default behavior which is go to the deepest
> idle state.
>
> Signed-off-by: Daniel Lezcano <[email protected]>
> Acked-by: Mathieu Poirier <[email protected]>
> Reviewed-by: Ulf Hansson <[email protected]>
> ---
> V6:
> - Change use_state variable name by use_state_idx:
> https://lkml.org/lkml/2019/10/28/874
> V5:
> - Fix s2idle default idle state value:
> https://lkml.org/lkml/2019/10/15/522
> ---
> drivers/cpuidle/cpuidle.c | 21 +++++++++++----------
> include/linux/cpuidle.h | 13 ++++++-------
> kernel/sched/idle.c | 11 +++++++----
> 3 files changed, 24 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index 0895b988fa92..18523ea6b11b 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -99,31 +99,31 @@ static int find_deepest_state(struct cpuidle_driver *drv,
> }
>
> /**
> - * cpuidle_use_deepest_state - Set/clear governor override flag.
> - * @enable: New value of the flag.
> + * cpuidle_use_state - Force the cpuidle framework to enter an idle state.
> + * @state: An integer for an idle state
> *
> - * Set/unset the current CPU to use the deepest idle state (override governors
> - * going forward if set).
> + * Specify an idle state the cpuidle framework must step in and bypass
> + * the idle state selection process.
> */
> -void cpuidle_use_deepest_state(bool enable)
> +void cpuidle_use_state(int index)
> {
> struct cpuidle_device *dev;
>
> preempt_disable();
> dev = cpuidle_get_device();
> if (dev)
> - dev->use_deepest_state = enable;
> + dev->use_state_idx = index;
> preempt_enable();
> }
>
> /**
> * cpuidle_find_deepest_state - Find the deepest available idle state.
> - * @drv: cpuidle driver for the given CPU.
> - * @dev: cpuidle device for the given CPU.
> */
> -int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
> - struct cpuidle_device *dev)
> +int cpuidle_find_deepest_state(void)
> {
> + struct cpuidle_device *dev = cpuidle_get_device();
> + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
This is totally wasteful for the cpuidle_idle_call() use case.
> +
> return find_deepest_state(drv, dev, UINT_MAX, 0, false);
> }
>
> @@ -554,6 +554,7 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev)
> static void __cpuidle_device_init(struct cpuidle_device *dev)
> {
> memset(dev->states_usage, 0, sizeof(dev->states_usage));
> + dev->use_state_idx = CPUIDLE_STATE_NOUSE;
> dev->last_residency = 0;
> dev->next_hrtimer = 0;
> }
> diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
> index 4b6b5bea8f79..d53ad36cb2de 100644
> --- a/include/linux/cpuidle.h
> +++ b/include/linux/cpuidle.h
> @@ -15,6 +15,7 @@
> #include <linux/list.h>
> #include <linux/hrtimer.h>
>
> +#define CPUIDLE_STATE_NOUSE -1
> #define CPUIDLE_STATE_MAX 10
> #define CPUIDLE_NAME_LEN 16
> #define CPUIDLE_DESC_LEN 32
> @@ -80,11 +81,11 @@ struct cpuidle_driver_kobj;
> struct cpuidle_device {
> unsigned int registered:1;
> unsigned int enabled:1;
> - unsigned int use_deepest_state:1;
> unsigned int poll_time_limit:1;
> unsigned int cpu;
> ktime_t next_hrtimer;
>
> + int use_state_idx;
> int last_state_idx;
> int last_residency;
> u64 poll_limit_ns;
> @@ -203,19 +204,17 @@ static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; }
> #endif
>
> #ifdef CONFIG_CPU_IDLE
> -extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
> - struct cpuidle_device *dev);
> +extern int cpuidle_find_deepest_state(void);
> extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
> struct cpuidle_device *dev);
> -extern void cpuidle_use_deepest_state(bool enable);
> +extern void cpuidle_use_state(int index);
> #else
> -static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
> - struct cpuidle_device *dev)
> +static inline int cpuidle_find_deepest_state(void)
> {return -ENODEV; }
> static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
> struct cpuidle_device *dev)
> {return -ENODEV; }
> -static inline void cpuidle_use_deepest_state(bool enable)
> +static inline void cpuidle_use_state(int index)
> {
> }
> #endif
> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index 8dad5aa600ea..fb9fc93f1497 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -165,7 +165,8 @@ static void cpuidle_idle_call(void)
> * until a proper wakeup interrupt happens.
> */
>
> - if (idle_should_enter_s2idle() || dev->use_deepest_state) {
> + if (idle_should_enter_s2idle() ||
> + dev->use_state_idx != CPUIDLE_STATE_NOUSE) {
> if (idle_should_enter_s2idle()) {
> rcu_idle_enter();
>
> @@ -176,12 +177,14 @@ static void cpuidle_idle_call(void)
> }
>
> rcu_idle_exit();
> + next_state = cpuidle_find_deepest_state();
> + } else {
> + next_state = dev->use_state_idx;
> }
>
> tick_nohz_idle_stop_tick();
> rcu_idle_enter();
>
> - next_state = cpuidle_find_deepest_state(drv, dev);
> call_cpuidle(drv, dev, next_state);
> } else {
> bool stop_tick = true;
> @@ -328,7 +331,7 @@ void play_idle(unsigned long duration_us)
> rcu_sleep_check();
> preempt_disable();
> current->flags |= PF_IDLE;
> - cpuidle_use_deepest_state(true);
> + cpuidle_use_state(cpuidle_find_deepest_state());
And this assumes that the deepest state will not change for the whole
play_idle() duration, but what if it is disabled by user space in the
meantime?
>
> it.done = 0;
> hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> @@ -339,7 +342,7 @@ void play_idle(unsigned long duration_us)
> while (!READ_ONCE(it.done))
> do_idle();
>
> - cpuidle_use_deepest_state(false);
> + cpuidle_use_state(CPUIDLE_STATE_NOUSE);
> current->flags &= ~PF_IDLE;
>
> preempt_fold_need_resched();
>
On Wednesday, October 30, 2019 8:51:40 AM CET Daniel Lezcano wrote:
> Currently, the play_idle function does not allow to tell which idle
> state we want to go. Improve this by passing the idle state as
> parameter to the function.
>
> Export cpuidle_find_deepest_state() symbol as it is used from the
> intel_powerclamp driver as a module.
>
> There is no functional changes, the cpuidle state is the deepest one.
>
> Signed-off-by: Daniel Lezcano <[email protected]>
> Acked-by: Mathieu Poirier <[email protected]>
> Reviewed-by: Ulf Hansson <[email protected]>
> ---
> V6:
> - Change variable name 'state' -> 'index':
> https://lkml.org/lkml/2019/10/28/874
> V4:
> - Add EXPORT_SYMBOL_GPL(cpuidle_find_deepest_state) for the
> intel_powerclamp driver when this one is compiled as a module
> V3:
> - Add missing cpuidle.h header
> ---
> drivers/cpuidle/cpuidle.c | 1 +
> drivers/powercap/idle_inject.c | 4 +++-
> drivers/thermal/intel/intel_powerclamp.c | 4 +++-
> include/linux/cpu.h | 2 +-
> kernel/sched/idle.c | 4 ++--
> 5 files changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index 18523ea6b11b..b871fc2e8e67 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -126,6 +126,7 @@ int cpuidle_find_deepest_state(void)
>
> return find_deepest_state(drv, dev, UINT_MAX, 0, false);
> }
> +EXPORT_SYMBOL_GPL(cpuidle_find_deepest_state);
That doesn't appear to be really necessary to me.
>
> #ifdef CONFIG_SUSPEND
> static void enter_s2idle_proper(struct cpuidle_driver *drv,
> diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
> index cd1270614cc6..233c878cbf46 100644
> --- a/drivers/powercap/idle_inject.c
> +++ b/drivers/powercap/idle_inject.c
> @@ -38,6 +38,7 @@
> #define pr_fmt(fmt) "ii_dev: " fmt
>
> #include <linux/cpu.h>
> +#include <linux/cpuidle.h>
> #include <linux/hrtimer.h>
> #include <linux/kthread.h>
> #include <linux/sched.h>
> @@ -138,7 +139,8 @@ static void idle_inject_fn(unsigned int cpu)
> */
> iit->should_run = 0;
>
> - play_idle(READ_ONCE(ii_dev->idle_duration_us));
> + play_idle(READ_ONCE(ii_dev->idle_duration_us),
> + cpuidle_find_deepest_state());
The next patch changes this again and I'm not sure why this intermediate
change is useful.
> }
>
> /**
> diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
> index 53216dcbe173..b55786c169ae 100644
> --- a/drivers/thermal/intel/intel_powerclamp.c
> +++ b/drivers/thermal/intel/intel_powerclamp.c
> @@ -29,6 +29,7 @@
> #include <linux/delay.h>
> #include <linux/kthread.h>
> #include <linux/cpu.h>
> +#include <linux/cpuidle.h>
> #include <linux/thermal.h>
> #include <linux/slab.h>
> #include <linux/tick.h>
> @@ -430,7 +431,8 @@ static void clamp_idle_injection_func(struct kthread_work *work)
> if (should_skip)
> goto balance;
>
> - play_idle(jiffies_to_usecs(w_data->duration_jiffies));
> + play_idle(jiffies_to_usecs(w_data->duration_jiffies),
> + cpuidle_find_deepest_state());
I don't see a reason for changing the code here like this.
What you really need is to have a way to set a limit on the idle
state exit latency for idle injection on ARM.
For that you can pass the exit latency limit to play_idle(), but then
you need to change powerclamp to pass UNIT_MAX or similar which is
ugly, or you can redefine cpuidle_use_deepest_state() to take the
exit latency limit as the arg (with 0 meaning use_deepest_state == false).
In the latter case, it would be quite straightforward to add an
exit_latency argument to cpuidle_find_deepest_state() and note that
find_deepest_state() takes a max_latency arg already, so that would be
a trivial change (hint!).
On Wednesday, October 30, 2019 8:51:41 AM CET Daniel Lezcano wrote:
> Currently the idle injection framework only allows to inject the
> deepest idle state available on the system.
>
> Give the opportunity to specify which idle state we want to inject by
> adding a new function helper to set the state and use it when calling
> play_idle().
>
> Signed-off-by: Daniel Lezcano <[email protected]>
> Acked-by: Mathieu Poirier <[email protected]>
> Reviewed-by: Ulf Hansson <[email protected]>
> ---
> V6:
> - Rename variable name 'state' -> 'state_idx':
> https://lkml.org/lkml/2019/10/28/874
> ---
> drivers/powercap/idle_inject.c | 14 +++++++++++++-
> include/linux/idle_inject.h | 3 +++
> 2 files changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
> index 233c878cbf46..2607d3e9afc5 100644
> --- a/drivers/powercap/idle_inject.c
> +++ b/drivers/powercap/idle_inject.c
> @@ -66,6 +66,7 @@ struct idle_inject_thread {
> */
> struct idle_inject_device {
> struct hrtimer timer;
> + int state_idx;
> unsigned int idle_duration_us;
> unsigned int run_duration_us;
> unsigned long int cpumask[0];
> @@ -140,7 +141,7 @@ static void idle_inject_fn(unsigned int cpu)
> iit->should_run = 0;
>
> play_idle(READ_ONCE(ii_dev->idle_duration_us),
> - cpuidle_find_deepest_state());
> + READ_ONCE(ii_dev->state_idx));
> }
>
> /**
> @@ -171,6 +172,16 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev,
> *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us);
> }
>
> +/**
> + * idle_inject_set_state - set the idle state to inject
> + * @state: an integer for the idle state to inject
> + */
> +void idle_inject_set_state(struct idle_inject_device *ii_dev, int index)
> +{
> + if (index >= CPUIDLE_STATE_NOUSE && index < CPUIDLE_STATE_MAX)
> + WRITE_ONCE(ii_dev->state_idx, index);
> +}
So whoever uses this interface, needs to know the idle states list as
provided by the cpuidle driver for the given CPU, but what really needs
to be specified here is the exit latency limit (which can be provided as
a number in us or ns without knowing the state index).
On 08/11/2019 02:20, Rafael J. Wysocki wrote:
> On Wednesday, October 30, 2019 8:51:40 AM CET Daniel Lezcano wrote:
>> Currently, the play_idle function does not allow to tell which idle
>> state we want to go. Improve this by passing the idle state as
>> parameter to the function.
>>
>> Export cpuidle_find_deepest_state() symbol as it is used from the
>> intel_powerclamp driver as a module.
>>
>> There is no functional changes, the cpuidle state is the deepest one.
>>
>> Signed-off-by: Daniel Lezcano <[email protected]>
>> Acked-by: Mathieu Poirier <[email protected]>
>> Reviewed-by: Ulf Hansson <[email protected]>
>> ---
>> V6:
>> - Change variable name 'state' -> 'index':
>> https://lkml.org/lkml/2019/10/28/874
>> V4:
>> - Add EXPORT_SYMBOL_GPL(cpuidle_find_deepest_state) for the
>> intel_powerclamp driver when this one is compiled as a module
>> V3:
>> - Add missing cpuidle.h header
>> ---
>> drivers/cpuidle/cpuidle.c | 1 +
>> drivers/powercap/idle_inject.c | 4 +++-
>> drivers/thermal/intel/intel_powerclamp.c | 4 +++-
>> include/linux/cpu.h | 2 +-
>> kernel/sched/idle.c | 4 ++--
>> 5 files changed, 10 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
>> index 18523ea6b11b..b871fc2e8e67 100644
>> --- a/drivers/cpuidle/cpuidle.c
>> +++ b/drivers/cpuidle/cpuidle.c
>> @@ -126,6 +126,7 @@ int cpuidle_find_deepest_state(void)
>>
>> return find_deepest_state(drv, dev, UINT_MAX, 0, false);
>> }
>> +EXPORT_SYMBOL_GPL(cpuidle_find_deepest_state);
>
> That doesn't appear to be really necessary to me.
>
>>
>> #ifdef CONFIG_SUSPEND
>> static void enter_s2idle_proper(struct cpuidle_driver *drv,
>> diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
>> index cd1270614cc6..233c878cbf46 100644
>> --- a/drivers/powercap/idle_inject.c
>> +++ b/drivers/powercap/idle_inject.c
>> @@ -38,6 +38,7 @@
>> #define pr_fmt(fmt) "ii_dev: " fmt
>>
>> #include <linux/cpu.h>
>> +#include <linux/cpuidle.h>
>> #include <linux/hrtimer.h>
>> #include <linux/kthread.h>
>> #include <linux/sched.h>
>> @@ -138,7 +139,8 @@ static void idle_inject_fn(unsigned int cpu)
>> */
>> iit->should_run = 0;
>>
>> - play_idle(READ_ONCE(ii_dev->idle_duration_us));
>> + play_idle(READ_ONCE(ii_dev->idle_duration_us),
>> + cpuidle_find_deepest_state());
>
> The next patch changes this again and I'm not sure why this intermediate
> change is useful.
>
>> }
>>
>> /**
>> diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
>> index 53216dcbe173..b55786c169ae 100644
>> --- a/drivers/thermal/intel/intel_powerclamp.c
>> +++ b/drivers/thermal/intel/intel_powerclamp.c
>> @@ -29,6 +29,7 @@
>> #include <linux/delay.h>
>> #include <linux/kthread.h>
>> #include <linux/cpu.h>
>> +#include <linux/cpuidle.h>
>> #include <linux/thermal.h>
>> #include <linux/slab.h>
>> #include <linux/tick.h>
>> @@ -430,7 +431,8 @@ static void clamp_idle_injection_func(struct kthread_work *work)
>> if (should_skip)
>> goto balance;
>>
>> - play_idle(jiffies_to_usecs(w_data->duration_jiffies));
>> + play_idle(jiffies_to_usecs(w_data->duration_jiffies),
>> + cpuidle_find_deepest_state());
>
> I don't see a reason for changing the code here like this.
>
> What you really need is to have a way to set a limit on the idle
> state exit latency for idle injection on ARM.
Mmh, yes you are right. The idle state number is part of the internals
of the cpuidle framework while the exit latency is an input (from user
or kernel).
> For that you can pass the exit latency limit to play_idle(), but then
> you need to change powerclamp to pass UNIT_MAX or similar which is
> ugly, or you can redefine cpuidle_use_deepest_state() to take the
> exit latency limit as the arg (with 0 meaning use_deepest_state == false).
Should it make sense to just get the resume latency in
cpuidle_use_deepest_state() and pass the value to find_deepest_state()?
It is the only code path where the constraint is not taken into account
AFAICT.
With this simple change, we can manage everything from the pm_qos API
then and this series is no longer needed.
> In the latter case, it would be quite straightforward to add an
> exit_latency argument to cpuidle_find_deepest_state() and note that
> find_deepest_state() takes a max_latency arg already, so that would be
> a trivial change (hint!).
--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
On Fri, Nov 8, 2019 at 11:47 AM Daniel Lezcano
<[email protected]> wrote:
>
> On 08/11/2019 02:20, Rafael J. Wysocki wrote:
> > On Wednesday, October 30, 2019 8:51:40 AM CET Daniel Lezcano wrote:
> >> Currently, the play_idle function does not allow to tell which idle
> >> state we want to go. Improve this by passing the idle state as
> >> parameter to the function.
> >>
> >> Export cpuidle_find_deepest_state() symbol as it is used from the
> >> intel_powerclamp driver as a module.
> >>
> >> There is no functional changes, the cpuidle state is the deepest one.
> >>
> >> Signed-off-by: Daniel Lezcano <[email protected]>
> >> Acked-by: Mathieu Poirier <[email protected]>
> >> Reviewed-by: Ulf Hansson <[email protected]>
> >> ---
> >> V6:
> >> - Change variable name 'state' -> 'index':
> >> https://lkml.org/lkml/2019/10/28/874
> >> V4:
> >> - Add EXPORT_SYMBOL_GPL(cpuidle_find_deepest_state) for the
> >> intel_powerclamp driver when this one is compiled as a module
> >> V3:
> >> - Add missing cpuidle.h header
> >> ---
> >> drivers/cpuidle/cpuidle.c | 1 +
> >> drivers/powercap/idle_inject.c | 4 +++-
> >> drivers/thermal/intel/intel_powerclamp.c | 4 +++-
> >> include/linux/cpu.h | 2 +-
> >> kernel/sched/idle.c | 4 ++--
> >> 5 files changed, 10 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> >> index 18523ea6b11b..b871fc2e8e67 100644
> >> --- a/drivers/cpuidle/cpuidle.c
> >> +++ b/drivers/cpuidle/cpuidle.c
> >> @@ -126,6 +126,7 @@ int cpuidle_find_deepest_state(void)
> >>
> >> return find_deepest_state(drv, dev, UINT_MAX, 0, false);
> >> }
> >> +EXPORT_SYMBOL_GPL(cpuidle_find_deepest_state);
> >
> > That doesn't appear to be really necessary to me.
> >
> >>
> >> #ifdef CONFIG_SUSPEND
> >> static void enter_s2idle_proper(struct cpuidle_driver *drv,
> >> diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
> >> index cd1270614cc6..233c878cbf46 100644
> >> --- a/drivers/powercap/idle_inject.c
> >> +++ b/drivers/powercap/idle_inject.c
> >> @@ -38,6 +38,7 @@
> >> #define pr_fmt(fmt) "ii_dev: " fmt
> >>
> >> #include <linux/cpu.h>
> >> +#include <linux/cpuidle.h>
> >> #include <linux/hrtimer.h>
> >> #include <linux/kthread.h>
> >> #include <linux/sched.h>
> >> @@ -138,7 +139,8 @@ static void idle_inject_fn(unsigned int cpu)
> >> */
> >> iit->should_run = 0;
> >>
> >> - play_idle(READ_ONCE(ii_dev->idle_duration_us));
> >> + play_idle(READ_ONCE(ii_dev->idle_duration_us),
> >> + cpuidle_find_deepest_state());
> >
> > The next patch changes this again and I'm not sure why this intermediate
> > change is useful.
> >
> >> }
> >>
> >> /**
> >> diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
> >> index 53216dcbe173..b55786c169ae 100644
> >> --- a/drivers/thermal/intel/intel_powerclamp.c
> >> +++ b/drivers/thermal/intel/intel_powerclamp.c
> >> @@ -29,6 +29,7 @@
> >> #include <linux/delay.h>
> >> #include <linux/kthread.h>
> >> #include <linux/cpu.h>
> >> +#include <linux/cpuidle.h>
> >> #include <linux/thermal.h>
> >> #include <linux/slab.h>
> >> #include <linux/tick.h>
> >> @@ -430,7 +431,8 @@ static void clamp_idle_injection_func(struct kthread_work *work)
> >> if (should_skip)
> >> goto balance;
> >>
> >> - play_idle(jiffies_to_usecs(w_data->duration_jiffies));
> >> + play_idle(jiffies_to_usecs(w_data->duration_jiffies),
> >> + cpuidle_find_deepest_state());
> >
> > I don't see a reason for changing the code here like this.
> >
> > What you really need is to have a way to set a limit on the idle
> > state exit latency for idle injection on ARM.
>
> Mmh, yes you are right. The idle state number is part of the internals
> of the cpuidle framework while the exit latency is an input (from user
> or kernel).
>
> > For that you can pass the exit latency limit to play_idle(), but then
> > you need to change powerclamp to pass UNIT_MAX or similar which is
> > ugly, or you can redefine cpuidle_use_deepest_state() to take the
> > exit latency limit as the arg (with 0 meaning use_deepest_state == false).
>
> Should it make sense to just get the resume latency in
> cpuidle_use_deepest_state() and pass the value to find_deepest_state()?
Yes, I would change cpuidle_use_deepest_state() to take the max exit
latency as the arg (maybe with 0 meaning "don't use the deepest state
only any more").
> It is the only code path where the constraint is not taken into account
> AFAICT.
>
> With this simple change, we can manage everything from the pm_qos API
> then and this series is no longer needed.
OK
On 08/11/2019 11:56, Rafael J. Wysocki wrote:
> On Fri, Nov 8, 2019 at 11:47 AM Daniel Lezcano
> <[email protected]> wrote:
>>
>> On 08/11/2019 02:20, Rafael J. Wysocki wrote:
>>> On Wednesday, October 30, 2019 8:51:40 AM CET Daniel Lezcano wrote:
>>>> Currently, the play_idle function does not allow to tell which idle
>>>> state we want to go. Improve this by passing the idle state as
>>>> parameter to the function.
>>>>
>>>> Export cpuidle_find_deepest_state() symbol as it is used from the
>>>> intel_powerclamp driver as a module.
>>>>
>>>> There is no functional changes, the cpuidle state is the deepest one.
>>>>
>>>> Signed-off-by: Daniel Lezcano <[email protected]>
>>>> Acked-by: Mathieu Poirier <[email protected]>
>>>> Reviewed-by: Ulf Hansson <[email protected]>
>>>> ---
>>>> V6:
>>>> - Change variable name 'state' -> 'index':
>>>> https://lkml.org/lkml/2019/10/28/874
>>>> V4:
>>>> - Add EXPORT_SYMBOL_GPL(cpuidle_find_deepest_state) for the
>>>> intel_powerclamp driver when this one is compiled as a module
>>>> V3:
>>>> - Add missing cpuidle.h header
>>>> ---
>>>> drivers/cpuidle/cpuidle.c | 1 +
>>>> drivers/powercap/idle_inject.c | 4 +++-
>>>> drivers/thermal/intel/intel_powerclamp.c | 4 +++-
>>>> include/linux/cpu.h | 2 +-
>>>> kernel/sched/idle.c | 4 ++--
>>>> 5 files changed, 10 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
>>>> index 18523ea6b11b..b871fc2e8e67 100644
>>>> --- a/drivers/cpuidle/cpuidle.c
>>>> +++ b/drivers/cpuidle/cpuidle.c
>>>> @@ -126,6 +126,7 @@ int cpuidle_find_deepest_state(void)
>>>>
>>>> return find_deepest_state(drv, dev, UINT_MAX, 0, false);
>>>> }
>>>> +EXPORT_SYMBOL_GPL(cpuidle_find_deepest_state);
>>>
>>> That doesn't appear to be really necessary to me.
>>>
>>>>
>>>> #ifdef CONFIG_SUSPEND
>>>> static void enter_s2idle_proper(struct cpuidle_driver *drv,
>>>> diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
>>>> index cd1270614cc6..233c878cbf46 100644
>>>> --- a/drivers/powercap/idle_inject.c
>>>> +++ b/drivers/powercap/idle_inject.c
>>>> @@ -38,6 +38,7 @@
>>>> #define pr_fmt(fmt) "ii_dev: " fmt
>>>>
>>>> #include <linux/cpu.h>
>>>> +#include <linux/cpuidle.h>
>>>> #include <linux/hrtimer.h>
>>>> #include <linux/kthread.h>
>>>> #include <linux/sched.h>
>>>> @@ -138,7 +139,8 @@ static void idle_inject_fn(unsigned int cpu)
>>>> */
>>>> iit->should_run = 0;
>>>>
>>>> - play_idle(READ_ONCE(ii_dev->idle_duration_us));
>>>> + play_idle(READ_ONCE(ii_dev->idle_duration_us),
>>>> + cpuidle_find_deepest_state());
>>>
>>> The next patch changes this again and I'm not sure why this intermediate
>>> change is useful.
>>>
>>>> }
>>>>
>>>> /**
>>>> diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
>>>> index 53216dcbe173..b55786c169ae 100644
>>>> --- a/drivers/thermal/intel/intel_powerclamp.c
>>>> +++ b/drivers/thermal/intel/intel_powerclamp.c
>>>> @@ -29,6 +29,7 @@
>>>> #include <linux/delay.h>
>>>> #include <linux/kthread.h>
>>>> #include <linux/cpu.h>
>>>> +#include <linux/cpuidle.h>
>>>> #include <linux/thermal.h>
>>>> #include <linux/slab.h>
>>>> #include <linux/tick.h>
>>>> @@ -430,7 +431,8 @@ static void clamp_idle_injection_func(struct kthread_work *work)
>>>> if (should_skip)
>>>> goto balance;
>>>>
>>>> - play_idle(jiffies_to_usecs(w_data->duration_jiffies));
>>>> + play_idle(jiffies_to_usecs(w_data->duration_jiffies),
>>>> + cpuidle_find_deepest_state());
>>>
>>> I don't see a reason for changing the code here like this.
>>>
>>> What you really need is to have a way to set a limit on the idle
>>> state exit latency for idle injection on ARM.
>>
>> Mmh, yes you are right. The idle state number is part of the internals
>> of the cpuidle framework while the exit latency is an input (from user
>> or kernel).
>>
>>> For that you can pass the exit latency limit to play_idle(), but then
>>> you need to change powerclamp to pass UNIT_MAX or similar which is
>>> ugly, or you can redefine cpuidle_use_deepest_state() to take the
>>> exit latency limit as the arg (with 0 meaning use_deepest_state == false).
>>
>> Should it make sense to just get the resume latency in
>> cpuidle_use_deepest_state() and pass the value to find_deepest_state()?
>
> Yes, I would change cpuidle_use_deepest_state() to take the max exit
> latency as the arg (maybe with 0 meaning "don't use the deepest state
> only any more").
Why not simply ?
int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
int latency = cpuidle_governor_latency_req(dev->cpu);
return find_deepest_state(drv, dev, latency_req, 0, false);
}
>> It is the only code path where the constraint is not taken into account
>> AFAICT.
>>
>> With this simple change, we can manage everything from the pm_qos API
>> then and this series is no longer needed.
>
> OK
>
--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
On Fri, Nov 8, 2019 at 2:33 PM Daniel Lezcano <[email protected]> wrote:
>
> On 08/11/2019 11:56, Rafael J. Wysocki wrote:
> > On Fri, Nov 8, 2019 at 11:47 AM Daniel Lezcano
> > <[email protected]> wrote:
> >>
> >> On 08/11/2019 02:20, Rafael J. Wysocki wrote:
> >>> On Wednesday, October 30, 2019 8:51:40 AM CET Daniel Lezcano wrote:
> >>>> Currently, the play_idle function does not allow to tell which idle
> >>>> state we want to go. Improve this by passing the idle state as
> >>>> parameter to the function.
> >>>>
> >>>> Export cpuidle_find_deepest_state() symbol as it is used from the
> >>>> intel_powerclamp driver as a module.
> >>>>
> >>>> There is no functional changes, the cpuidle state is the deepest one.
> >>>>
> >>>> Signed-off-by: Daniel Lezcano <[email protected]>
> >>>> Acked-by: Mathieu Poirier <[email protected]>
> >>>> Reviewed-by: Ulf Hansson <[email protected]>
> >>>> ---
> >>>> V6:
> >>>> - Change variable name 'state' -> 'index':
> >>>> https://lkml.org/lkml/2019/10/28/874
> >>>> V4:
> >>>> - Add EXPORT_SYMBOL_GPL(cpuidle_find_deepest_state) for the
> >>>> intel_powerclamp driver when this one is compiled as a module
> >>>> V3:
> >>>> - Add missing cpuidle.h header
> >>>> ---
> >>>> drivers/cpuidle/cpuidle.c | 1 +
> >>>> drivers/powercap/idle_inject.c | 4 +++-
> >>>> drivers/thermal/intel/intel_powerclamp.c | 4 +++-
> >>>> include/linux/cpu.h | 2 +-
> >>>> kernel/sched/idle.c | 4 ++--
> >>>> 5 files changed, 10 insertions(+), 5 deletions(-)
> >>>>
> >>>> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> >>>> index 18523ea6b11b..b871fc2e8e67 100644
> >>>> --- a/drivers/cpuidle/cpuidle.c
> >>>> +++ b/drivers/cpuidle/cpuidle.c
> >>>> @@ -126,6 +126,7 @@ int cpuidle_find_deepest_state(void)
> >>>>
> >>>> return find_deepest_state(drv, dev, UINT_MAX, 0, false);
> >>>> }
> >>>> +EXPORT_SYMBOL_GPL(cpuidle_find_deepest_state);
> >>>
> >>> That doesn't appear to be really necessary to me.
> >>>
> >>>>
> >>>> #ifdef CONFIG_SUSPEND
> >>>> static void enter_s2idle_proper(struct cpuidle_driver *drv,
> >>>> diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
> >>>> index cd1270614cc6..233c878cbf46 100644
> >>>> --- a/drivers/powercap/idle_inject.c
> >>>> +++ b/drivers/powercap/idle_inject.c
> >>>> @@ -38,6 +38,7 @@
> >>>> #define pr_fmt(fmt) "ii_dev: " fmt
> >>>>
> >>>> #include <linux/cpu.h>
> >>>> +#include <linux/cpuidle.h>
> >>>> #include <linux/hrtimer.h>
> >>>> #include <linux/kthread.h>
> >>>> #include <linux/sched.h>
> >>>> @@ -138,7 +139,8 @@ static void idle_inject_fn(unsigned int cpu)
> >>>> */
> >>>> iit->should_run = 0;
> >>>>
> >>>> - play_idle(READ_ONCE(ii_dev->idle_duration_us));
> >>>> + play_idle(READ_ONCE(ii_dev->idle_duration_us),
> >>>> + cpuidle_find_deepest_state());
> >>>
> >>> The next patch changes this again and I'm not sure why this intermediate
> >>> change is useful.
> >>>
> >>>> }
> >>>>
> >>>> /**
> >>>> diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
> >>>> index 53216dcbe173..b55786c169ae 100644
> >>>> --- a/drivers/thermal/intel/intel_powerclamp.c
> >>>> +++ b/drivers/thermal/intel/intel_powerclamp.c
> >>>> @@ -29,6 +29,7 @@
> >>>> #include <linux/delay.h>
> >>>> #include <linux/kthread.h>
> >>>> #include <linux/cpu.h>
> >>>> +#include <linux/cpuidle.h>
> >>>> #include <linux/thermal.h>
> >>>> #include <linux/slab.h>
> >>>> #include <linux/tick.h>
> >>>> @@ -430,7 +431,8 @@ static void clamp_idle_injection_func(struct kthread_work *work)
> >>>> if (should_skip)
> >>>> goto balance;
> >>>>
> >>>> - play_idle(jiffies_to_usecs(w_data->duration_jiffies));
> >>>> + play_idle(jiffies_to_usecs(w_data->duration_jiffies),
> >>>> + cpuidle_find_deepest_state());
> >>>
> >>> I don't see a reason for changing the code here like this.
> >>>
> >>> What you really need is to have a way to set a limit on the idle
> >>> state exit latency for idle injection on ARM.
> >>
> >> Mmh, yes you are right. The idle state number is part of the internals
> >> of the cpuidle framework while the exit latency is an input (from user
> >> or kernel).
> >>
> >>> For that you can pass the exit latency limit to play_idle(), but then
> >>> you need to change powerclamp to pass UNIT_MAX or similar which is
> >>> ugly, or you can redefine cpuidle_use_deepest_state() to take the
> >>> exit latency limit as the arg (with 0 meaning use_deepest_state == false).
> >>
> >> Should it make sense to just get the resume latency in
> >> cpuidle_use_deepest_state() and pass the value to find_deepest_state()?
> >
> > Yes, I would change cpuidle_use_deepest_state() to take the max exit
> > latency as the arg (maybe with 0 meaning "don't use the deepest state
> > only any more").
>
> Why not simply ?
>
> int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
> struct cpuidle_device *dev)
> {
> int latency = cpuidle_governor_latency_req(dev->cpu);
>
> return find_deepest_state(drv, dev, latency_req, 0, false);
> }
Because, AFAICS, that doesn't work for powerclamp.
On 08/11/2019 02:20, Rafael J. Wysocki wrote:
> On Wednesday, October 30, 2019 8:51:40 AM CET Daniel Lezcano wrote:
>> Currently, the play_idle function does not allow to tell which idle
>> state we want to go. Improve this by passing the idle state as
>> parameter to the function.
>>
>> Export cpuidle_find_deepest_state() symbol as it is used from the
>> intel_powerclamp driver as a module.
[ ... ]
>> /**
>> diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
>> index 53216dcbe173..b55786c169ae 100644
>> --- a/drivers/thermal/intel/intel_powerclamp.c
>> +++ b/drivers/thermal/intel/intel_powerclamp.c
>> @@ -29,6 +29,7 @@
>> #include <linux/delay.h>
>> #include <linux/kthread.h>
>> #include <linux/cpu.h>
>> +#include <linux/cpuidle.h>
>> #include <linux/thermal.h>
>> #include <linux/slab.h>
>> #include <linux/tick.h>
>> @@ -430,7 +431,8 @@ static void clamp_idle_injection_func(struct kthread_work *work)
>> if (should_skip)
>> goto balance;
>>
>> - play_idle(jiffies_to_usecs(w_data->duration_jiffies));
>> + play_idle(jiffies_to_usecs(w_data->duration_jiffies),
>> + cpuidle_find_deepest_state());
>
> I don't see a reason for changing the code here like this.
>
> What you really need is to have a way to set a limit on the idle
> state exit latency for idle injection on ARM.
>
> For that you can pass the exit latency limit to play_idle(), but then
> you need to change powerclamp to pass UNIT_MAX or similar which is
> ugly, or you can redefine cpuidle_use_deepest_state() to take the
> exit latency limit as the arg (with 0 meaning use_deepest_state == false).
I'm confused with the "... ugly, *or* ...". In any case we have to
specify a latency constraint to play_idle(), no?
> In the latter case, it would be quite straightforward to add an
> exit_latency argument to cpuidle_find_deepest_state() and note that
> find_deepest_state() takes a max_latency arg already, so that would be
> a trivial change (hint!).
--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
hi Daniel,
Since there's been quite some changes in cpuidle recently, how's your plans
to move this patchset forward? I, at least, need it.
thanks for that,
martin
On 05/12/2019 18:04, Martin Kepplinger wrote:
> hi Daniel,
>
> Since there's been quite some changes in cpuidle recently, how's your plans
> to move this patchset forward? I, at least, need it.
This series was merged but instead of specifying an idle state, we
specify an exit latency [1].
The cooling device itself is at V4 with some review tags. It should be
merged soon [2].
-- Daniel
[1]
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c55b51a06b01d67a99457bb82a8c31081c7faa23
[2] https://lkml.org/lkml/2019/12/4/563
--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog