LinuxLists.cc - [PATCH 1/5] sched: idle: cpuidle: Check the latency req before idle

2014-10-20 16:25:52

Subject: [PATCH 1/5] sched: idle: cpuidle: Check the latency req before idle

When the pmqos latency requirement is set to zero that means "poll in all the
cases".

That is correctly implemented on x86 but not on the other archs.

As how is written the code, if the latency request is zero, the governor will
return zero, so corresponding, for x86, to the poll function, but for the
others arch the default idle function. For example, on ARM this is wait-for-
interrupt with a latency of '1', so violating the constraint.

In order to fix that, do the latency requirement check *before* calling the
cpuidle framework in order to jump to the poll function without entering
cpuidle. That has several benefits:

1. It clarifies and unifies the code
2. It fixes x86 vs other archs behavior
3. Factors out the call to the same function
4. Prevent to enter the cpuidle framework with its expensive cost in
calculation

As the latency_req is needed in all the cases, change the select API to take
the latency_req as parameter in case it is not equal to zero.

As a positive side effect, it introduces the latency constraint specified
externally, so one more step to the cpuidle/scheduler integration.

Signed-off-by: Daniel Lezcano <[email protected]>
---
drivers/cpuidle/cpuidle.c | 5 +++--
drivers/cpuidle/governors/ladder.c | 9 +--------
drivers/cpuidle/governors/menu.c | 8 ++------
include/linux/cpuidle.h | 7 ++++---
kernel/sched/idle.c | 18 ++++++++++++++----
5 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index ee9df5e..372c36f 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -158,7 +158,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
*
* Returns the index of the idle state.
*/
-int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ int latency_req)
{
if (off || !initialized)
return -ENODEV;
@@ -169,7 +170,7 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
if (unlikely(use_deepest_state))
return cpuidle_find_deepest_state(drv, dev);

- return cpuidle_curr_governor->select(drv, dev);
+ return cpuidle_curr_governor->select(drv, dev, latency_req);
}

/**
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 044ee0d..18f0da9 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -64,18 +64,11 @@ static inline void ladder_do_selection(struct ladder_device *ldev,
* @dev: the CPU
*/
static int ladder_select_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+ struct cpuidle_device *dev, int latency_req)
{
struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
struct ladder_device_state *last_state;
int last_residency, last_idx = ldev->last_state_idx;
- int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
-
- /* Special case when user has set very strict latency requirement */
- if (unlikely(latency_req == 0)) {
- ladder_do_selection(ldev, last_idx, 0);
- return 0;
- }

last_state = &ldev->states[last_idx];

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 34db2fb..96f8fb0 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -287,10 +287,10 @@ again:
* @drv: cpuidle driver containing state data
* @dev: the CPU
*/
-static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ int latency_req)
{
struct menu_device *data = &__get_cpu_var(menu_devices);
- int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
int i;
unsigned int interactivity_req;
unsigned long nr_iowaiters, cpu_load;
@@ -302,10 +302,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)

data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;

- /* Special case when user has set very strict latency requirement */
- if (unlikely(latency_req == 0))
- return 0;
-
/* determine the expected residency time, round up */
data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 25e0df6..fb465c1 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -122,7 +122,7 @@ struct cpuidle_driver {
extern void disable_cpuidle(void);

extern int cpuidle_select(struct cpuidle_driver *drv,
- struct cpuidle_device *dev);
+ struct cpuidle_device *dev, int latency_req);
extern int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index);
extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
@@ -150,7 +150,7 @@ extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
#else
static inline void disable_cpuidle(void) { }
static inline int cpuidle_select(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+ struct cpuidle_device *dev, int latency_req)
{return -ENODEV; }
static inline int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index)
@@ -205,7 +205,8 @@ struct cpuidle_governor {
struct cpuidle_device *dev);

int (*select) (struct cpuidle_driver *drv,
- struct cpuidle_device *dev);
+ struct cpuidle_device *dev,
+ int latency_req);
void (*reflect) (struct cpuidle_device *dev, int index);

struct module *owner;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 11e7bc4..25ba94d 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -5,6 +5,7 @@
#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/tick.h>
+#include <linux/pm_qos.h>
#include <linux/mm.h>
#include <linux/stackprotector.h>

@@ -74,7 +75,7 @@ void __weak arch_cpu_idle(void)
* set, and it returns with polling set. If it ever stops polling, it
* must clear the polling bit.
*/
-static void cpuidle_idle_call(void)
+static void cpuidle_idle_call(unsigned int latency_req)
{
struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
@@ -107,7 +108,7 @@ static void cpuidle_idle_call(void)
* Ask the cpuidle framework to choose a convenient idle state.
* Fall back to the default arch idle method on errors.
*/
- next_state = cpuidle_select(drv, dev);
+ next_state = cpuidle_select(drv, dev, latency_req);
if (next_state < 0) {
use_default:
/*
@@ -182,6 +183,8 @@ exit_idle:
*/
static void cpu_idle_loop(void)
{
+ unsigned int latency_req;
+
while (1) {
/*
* If the arch has a polling bit, we maintain an invariant:
@@ -205,19 +208,26 @@ static void cpu_idle_loop(void)
local_irq_disable();
arch_cpu_idle_enter();

+ latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
+
/*
* In poll mode we reenable interrupts and spin.
*
+ * If the latency req is zero, we don't want to
+ * enter any idle state and we jump to the poll
+ * function directly
+ *
* Also if we detected in the wakeup from idle
* path that the tick broadcast device expired
* for us, we don't want to go deep idle as we
* know that the IPI is going to arrive right
* away
*/
- if (cpu_idle_force_poll || tick_check_broadcast_expired())
+ if (!latency_req || cpu_idle_force_poll ||
+ tick_check_broadcast_expired())
cpu_idle_poll();
else
- cpuidle_idle_call();
+ cpuidle_idle_call(latency_req);

arch_cpu_idle_exit();
}
--
1.9.1

2014-10-20 16:25:57

by Daniel Lezcano

[permalink] [raw]

Subject: [PATCH 2/5] sched: idle: Get the next timer event and pass it the cpuidle framework

Following the logic of the previous patch, retrieve from the idle task the
expected timer sleep duration and pass it to the cpuidle framework.

Take the opportunity to remove the unused headers in the menu.c file.

This patch does not change the current behavior.

Signed-off-by: Daniel Lezcano <[email protected]>
---
drivers/cpuidle/cpuidle.c | 11 +++++------
drivers/cpuidle/governors/ladder.c | 3 ++-
drivers/cpuidle/governors/menu.c | 8 ++------
include/linux/cpuidle.h | 8 +++++---
kernel/sched/idle.c | 16 ++++++++++++----
5 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 372c36f..64f5800 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -8,16 +8,12 @@
* This code is licenced under the GPL.
*/

-#include <linux/clockchips.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
-#include <linux/sched.h>
#include <linux/notifier.h>
#include <linux/pm_qos.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
-#include <linux/ktime.h>
-#include <linux/hrtimer.h>
#include <linux/module.h>
#include <trace/events/power.h>

@@ -155,11 +151,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
*
* @drv: the cpuidle driver
* @dev: the cpuidle device
+ * @latency_req: the latency constraint when choosing an idle state
+ * @next_timer_event: the duration until the timer expires
*
* Returns the index of the idle state.
*/
int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
- int latency_req)
+ int latency_req, int next_timer_event)
{
if (off || !initialized)
return -ENODEV;
@@ -170,7 +168,8 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (unlikely(use_deepest_state))
return cpuidle_find_deepest_state(drv, dev);

- return cpuidle_curr_governor->select(drv, dev, latency_req);
+ return cpuidle_curr_governor->select(drv, dev, latency_req,
+ next_timer_event);
}

/**
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 18f0da9..fb396d6 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -64,7 +64,8 @@ static inline void ladder_do_selection(struct ladder_device *ldev,
* @dev: the CPU
*/
static int ladder_select_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev, int latency_req)
+ struct cpuidle_device *dev,
+ int latency_req, int next_timer_event)
{
struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
struct ladder_device_state *last_state;
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 96f8fb0..a17515f 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -13,10 +13,6 @@
#include <linux/kernel.h>
#include <linux/cpuidle.h>
#include <linux/pm_qos.h>
-#include <linux/time.h>
-#include <linux/ktime.h>
-#include <linux/hrtimer.h>
-#include <linux/tick.h>
#include <linux/sched.h>
#include <linux/math64.h>
#include <linux/module.h>
@@ -288,7 +284,7 @@ again:
* @dev: the CPU
*/
static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
- int latency_req)
+ int latency_req, int next_timer_event)
{
struct menu_device *data = &__get_cpu_var(menu_devices);
int i;
@@ -303,7 +299,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;

/* determine the expected residency time, round up */
- data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
+ data->next_timer_us = next_timer_event;

get_iowait_load(&nr_iowaiters, &cpu_load);
data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index fb465c1..d477746 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -122,7 +122,8 @@ struct cpuidle_driver {
extern void disable_cpuidle(void);

extern int cpuidle_select(struct cpuidle_driver *drv,
- struct cpuidle_device *dev, int latency_req);
+ struct cpuidle_device *dev,
+ int latency_req, int next_timer_event);
extern int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index);
extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
@@ -150,7 +151,8 @@ extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
#else
static inline void disable_cpuidle(void) { }
static inline int cpuidle_select(struct cpuidle_driver *drv,
- struct cpuidle_device *dev, int latency_req)
+ struct cpuidle_device *dev,
+ int latency_req, int next_timer_event)
{return -ENODEV; }
static inline int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index)
@@ -206,7 +208,7 @@ struct cpuidle_governor {

int (*select) (struct cpuidle_driver *drv,
struct cpuidle_device *dev,
- int latency_req);
+ int latency_req, int next_timer_event);
void (*reflect) (struct cpuidle_device *dev, int index);

struct module *owner;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 25ba94d..f439161 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -75,7 +75,8 @@ void __weak arch_cpu_idle(void)
* set, and it returns with polling set. If it ever stops polling, it
* must clear the polling bit.
*/
-static void cpuidle_idle_call(unsigned int latency_req)
+static void cpuidle_idle_call(unsigned int latency_req,
+ unsigned int next_timer_event)
{
struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
@@ -108,7 +109,7 @@ static void cpuidle_idle_call(unsigned int latency_req)
* Ask the cpuidle framework to choose a convenient idle state.
* Fall back to the default arch idle method on errors.
*/
- next_state = cpuidle_select(drv, dev, latency_req);
+ next_state = cpuidle_select(drv, dev, latency_req, next_timer_event);
if (next_state < 0) {
use_default:
/*
@@ -183,7 +184,7 @@ exit_idle:
*/
static void cpu_idle_loop(void)
{
- unsigned int latency_req;
+ unsigned int latency_req, next_timer_event;

while (1) {
/*
@@ -211,6 +212,12 @@ static void cpu_idle_loop(void)
latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);

/*
+ * The next timer event in us
+ */
+ next_timer_event = ktime_to_us(
+ tick_nohz_get_sleep_length());
+
+ /*
* In poll mode we reenable interrupts and spin.
*
* If the latency req is zero, we don't want to
@@ -227,7 +234,8 @@ static void cpu_idle_loop(void)
tick_check_broadcast_expired())
cpu_idle_poll();
else
- cpuidle_idle_call(latency_req);
+ cpuidle_idle_call(latency_req,
+ next_timer_event);

arch_cpu_idle_exit();
}
--
1.9.1

2014-10-20 16:26:02

by Daniel Lezcano

[permalink] [raw]

Subject: [PATCH 5/5] cpuidle: menu: Move the update function before its declaration

In order to prevent a pointless forward declaration, just move the function
at the beginning of the file.

This patch does not change the behavior of the governor, it is just code
reordering.

Signed-off-by: Daniel Lezcano <[email protected]>
---
drivers/cpuidle/governors/menu.c | 149 +++++++++++++++++++--------------------
1 file changed, 74 insertions(+), 75 deletions(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 6ae8390..0ac76b1 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -184,7 +184,6 @@ static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned lo

static DEFINE_PER_CPU(struct menu_device, menu_devices);

-static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);

/* This implements DIV_ROUND_CLOSEST but avoids 64 bit division */
static u64 div_round64(u64 dividend, u32 divisor)
@@ -192,6 +191,80 @@ static u64 div_round64(u64 dividend, u32 divisor)
return div_u64(dividend + (divisor / 2), divisor);
}

+/**
+ * menu_update - attempts to guess what happened after entry
+ * @drv: cpuidle driver containing state data
+ * @dev: the CPU
+ */
+static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+{
+ struct menu_device *data = &__get_cpu_var(menu_devices);
+ int last_idx = data->last_state_idx;
+ struct cpuidle_state *target = &drv->states[last_idx];
+ unsigned int measured_us;
+ unsigned int new_factor;
+
+ /*
+ * Try to figure out how much time passed between entry to low
+ * power state and occurrence of the wakeup event.
+ *
+ * If the entered idle state didn't support residency measurements,
+ * we are basically lost in the dark how much time passed.
+ * As a compromise, assume we slept for the whole expected time.
+ *
+ * Any measured amount of time will include the exit latency.
+ * Since we are interested in when the wakeup begun, not when it
+ * was completed, we must subtract the exit latency. However, if
+ * the measured amount of time is less than the exit latency,
+ * assume the state was never reached and the exit latency is 0.
+ */
+ if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) {
+ /* Use timer value as is */
+ measured_us = data->next_timer_us;
+
+ } else {
+ /* Use measured value */
+ measured_us = cpuidle_get_last_residency(dev);
+
+ /* Deduct exit latency */
+ if (measured_us > target->exit_latency)
+ measured_us -= target->exit_latency;
+
+ /* Make sure our coefficients do not exceed unity */
+ if (measured_us > data->next_timer_us)
+ measured_us = data->next_timer_us;
+ }
+
+ /* Update our correction ratio */
+ new_factor = data->correction_factor[data->bucket];
+ new_factor -= new_factor / DECAY;
+
+ if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
+ new_factor += RESOLUTION * measured_us / data->next_timer_us;
+ else
+ /*
+ * we were idle so long that we count it as a perfect
+ * prediction
+ */
+ new_factor += RESOLUTION;
+
+ /*
+ * We don't want 0 as factor; we always want at least
+ * a tiny bit of estimated time. Fortunately, due to rounding,
+ * new_factor will stay nonzero regardless of measured_us values
+ * and the compiler can eliminate this test as long as DECAY > 1.
+ */
+ if (DECAY == 1 && unlikely(new_factor == 0))
+ new_factor = 1;
+
+ data->correction_factor[data->bucket] = new_factor;
+
+ /* update the repeating-pattern data */
+ data->intervals[data->interval_ptr++] = measured_us;
+ if (data->interval_ptr >= INTERVALS)
+ data->interval_ptr = 0;
+}
+
/*
* Try detecting repeating patterns by keeping track of the last 8
* intervals, and checking if the standard deviation of that set
@@ -378,80 +451,6 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
}

/**
- * menu_update - attempts to guess what happened after entry
- * @drv: cpuidle driver containing state data
- * @dev: the CPU
- */
-static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
-{
- struct menu_device *data = &__get_cpu_var(menu_devices);
- int last_idx = data->last_state_idx;
- struct cpuidle_state *target = &drv->states[last_idx];
- unsigned int measured_us;
- unsigned int new_factor;
-
- /*
- * Try to figure out how much time passed between entry to low
- * power state and occurrence of the wakeup event.
- *
- * If the entered idle state didn't support residency measurements,
- * we are basically lost in the dark how much time passed.
- * As a compromise, assume we slept for the whole expected time.
- *
- * Any measured amount of time will include the exit latency.
- * Since we are interested in when the wakeup begun, not when it
- * was completed, we must subtract the exit latency. However, if
- * the measured amount of time is less than the exit latency,
- * assume the state was never reached and the exit latency is 0.
- */
- if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) {
- /* Use timer value as is */
- measured_us = data->next_timer_us;
-
- } else {
- /* Use measured value */
- measured_us = cpuidle_get_last_residency(dev);
-
- /* Deduct exit latency */
- if (measured_us > target->exit_latency)
- measured_us -= target->exit_latency;
-
- /* Make sure our coefficients do not exceed unity */
- if (measured_us > data->next_timer_us)
- measured_us = data->next_timer_us;
- }
-
- /* Update our correction ratio */
- new_factor = data->correction_factor[data->bucket];
- new_factor -= new_factor / DECAY;
-
- if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
- new_factor += RESOLUTION * measured_us / data->next_timer_us;
- else
- /*
- * we were idle so long that we count it as a perfect
- * prediction
- */
- new_factor += RESOLUTION;
-
- /*
- * We don't want 0 as factor; we always want at least
- * a tiny bit of estimated time. Fortunately, due to rounding,
- * new_factor will stay nonzero regardless of measured_us values
- * and the compiler can eliminate this test as long as DECAY > 1.
- */
- if (DECAY == 1 && unlikely(new_factor == 0))
- new_factor = 1;
-
- data->correction_factor[data->bucket] = new_factor;
-
- /* update the repeating-pattern data */
- data->intervals[data->interval_ptr++] = measured_us;
- if (data->interval_ptr >= INTERVALS)
- data->interval_ptr = 0;
-}
-
-/**
* menu_enable_device - scans a CPU's states and does setup
* @drv: cpuidle driver
* @dev: the CPU
--
1.9.1

2014-10-20 16:26:37

by Daniel Lezcano

[permalink] [raw]

Subject: [PATCH 3/5] cpuidle: idle: menu: Don't reflect when a state selection failed

In the current code, the check to reflect or not the outcoming state is done
against the idle state which has been choose and its value.

Instead of doing a check in each of the reflect functions, just don't call reflect
if something went wrong in the idle path.

Signed-off-by: Daniel Lezcano <[email protected]>
---
drivers/cpuidle/governors/ladder.c | 3 +--
drivers/cpuidle/governors/menu.c | 4 +---
kernel/sched/idle.c | 3 ++-
3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index fb396d6..c0b36a8 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -165,8 +165,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
static void ladder_reflect(struct cpuidle_device *dev, int index)
{
struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
- if (index > 0)
- ldev->last_state_idx = index;
+ ldev->last_state_idx = index;
}

static struct cpuidle_governor ladder_governor = {
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index a17515f..3907301 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -365,9 +365,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
static void menu_reflect(struct cpuidle_device *dev, int index)
{
struct menu_device *data = &__get_cpu_var(menu_devices);
- data->last_state_idx = index;
- if (index >= 0)
- data->needs_update = 1;
+ data->needs_update = 1;
}

/**
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index f439161..9ac7322 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -162,7 +162,8 @@ use_default:
/*
* Give the governor an opportunity to reflect on the outcome
*/
- cpuidle_reflect(dev, entered_state);
+ if (entered_state >= 0)
+ cpuidle_reflect(dev, entered_state);

exit_idle:
__current_set_polling();
--
1.9.1

2014-10-20 16:26:59

by Daniel Lezcano

[permalink] [raw]

Subject: [PATCH 4/5] cpuidle: menu: Fix the get_typical_interval

The first time the 'get_typical_function' is called, it computes an average
of zero as no data is filled yet. That leads the 'data->predicted_us' variable
to be set to zero too.

The caller, 'menu_select' will then do:

interactivity_req = data->predicted_us /
performance_multiplier(nr_iowaiters, cpu_load);

That sets the interactivity_req to zero (0/performance...).

and then

if (latency_req > interactivity_req)
latency_req = interactivity_req;

... setting 'latency_req' to zero too.

No idle state will fulfill this constraint and we will go the C1 state as
default and leading to an update. So the next calls will compute an average
different from zero.

Even if that works with the current code but with a broken semantic, it will
just break with the next patches where we are stricter with the latencies
check: the first check will fail (latency_req is zero), then no update will
occur leading to always falling to choose an idle state.

As there are no previous values and it is pointless to compute a standard
deviation for these unexisting values. Just return without setting the
'data->predicted_us' to zero.

Signed-off-by: Daniel Lezcano <[email protected]>
---
drivers/cpuidle/governors/menu.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 3907301..6ae8390 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -226,6 +226,15 @@ again:
else
do_div(avg, divisor);

+ /*
+ * We are at the very beginning and no data have been filled
+ * yet. Let's skip the standard deviation computation
+ * otherwise the data->predicted_us will be zero and that will
+ * lead to a zero latency req in the select function
+ */
+ if (!avg)
+ return;
+
/* Then try to determine standard deviation */
stddev = 0;
for (i = 0; i < INTERVALS; i++) {
--
1.9.1

2014-10-22 20:30:11

by Nicolas Pitre

[permalink] [raw]

Subject: Re: [PATCH 1/5] sched: idle: cpuidle: Check the latency req before idle

On Mon, 20 Oct 2014, Daniel Lezcano wrote:

> When the pmqos latency requirement is set to zero that means "poll in all the
> cases".
>
> That is correctly implemented on x86 but not on the other archs.
>
> As how is written the code, if the latency request is zero, the governor will
> return zero, so corresponding, for x86, to the poll function, but for the
> others arch the default idle function. For example, on ARM this is wait-for-
> interrupt with a latency of '1', so violating the constraint.
>
> In order to fix that, do the latency requirement check *before* calling the
> cpuidle framework in order to jump to the poll function without entering
> cpuidle. That has several benefits:
>
> 1. It clarifies and unifies the code
> 2. It fixes x86 vs other archs behavior
> 3. Factors out the call to the same function
> 4. Prevent to enter the cpuidle framework with its expensive cost in
> calculation
>
> As the latency_req is needed in all the cases, change the select API to take
> the latency_req as parameter in case it is not equal to zero.
>
> As a positive side effect, it introduces the latency constraint specified
> externally, so one more step to the cpuidle/scheduler integration.
>
> Signed-off-by: Daniel Lezcano <[email protected]>

Acked-by: Nicolas Pitre <[email protected]>

> ---
> drivers/cpuidle/cpuidle.c | 5 +++--
> drivers/cpuidle/governors/ladder.c | 9 +--------
> drivers/cpuidle/governors/menu.c | 8 ++------
> include/linux/cpuidle.h | 7 ++++---
> kernel/sched/idle.c | 18 ++++++++++++++----
> 5 files changed, 24 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index ee9df5e..372c36f 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -158,7 +158,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
> *
> * Returns the index of the idle state.
> */
> -int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> +int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> + int latency_req)
> {
> if (off || !initialized)
> return -ENODEV;
> @@ -169,7 +170,7 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> if (unlikely(use_deepest_state))
> return cpuidle_find_deepest_state(drv, dev);
>
> - return cpuidle_curr_governor->select(drv, dev);
> + return cpuidle_curr_governor->select(drv, dev, latency_req);
> }
>
> /**
> diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
> index 044ee0d..18f0da9 100644
> --- a/drivers/cpuidle/governors/ladder.c
> +++ b/drivers/cpuidle/governors/ladder.c
> @@ -64,18 +64,11 @@ static inline void ladder_do_selection(struct ladder_device *ldev,
> * @dev: the CPU
> */
> static int ladder_select_state(struct cpuidle_driver *drv,
> - struct cpuidle_device *dev)
> + struct cpuidle_device *dev, int latency_req)
> {
> struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
> struct ladder_device_state *last_state;
> int last_residency, last_idx = ldev->last_state_idx;
> - int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
> -
> - /* Special case when user has set very strict latency requirement */
> - if (unlikely(latency_req == 0)) {
> - ladder_do_selection(ldev, last_idx, 0);
> - return 0;
> - }
>
> last_state = &ldev->states[last_idx];
>
> diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
> index 34db2fb..96f8fb0 100644
> --- a/drivers/cpuidle/governors/menu.c
> +++ b/drivers/cpuidle/governors/menu.c
> @@ -287,10 +287,10 @@ again:
> * @drv: cpuidle driver containing state data
> * @dev: the CPU
> */
> -static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> +static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> + int latency_req)
> {
> struct menu_device *data = &__get_cpu_var(menu_devices);
> - int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
> int i;
> unsigned int interactivity_req;
> unsigned long nr_iowaiters, cpu_load;
> @@ -302,10 +302,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
>
> data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
>
> - /* Special case when user has set very strict latency requirement */
> - if (unlikely(latency_req == 0))
> - return 0;
> -
> /* determine the expected residency time, round up */
> data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
>
> diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
> index 25e0df6..fb465c1 100644
> --- a/include/linux/cpuidle.h
> +++ b/include/linux/cpuidle.h
> @@ -122,7 +122,7 @@ struct cpuidle_driver {
> extern void disable_cpuidle(void);
>
> extern int cpuidle_select(struct cpuidle_driver *drv,
> - struct cpuidle_device *dev);
> + struct cpuidle_device *dev, int latency_req);
> extern int cpuidle_enter(struct cpuidle_driver *drv,
> struct cpuidle_device *dev, int index);
> extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
> @@ -150,7 +150,7 @@ extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
> #else
> static inline void disable_cpuidle(void) { }
> static inline int cpuidle_select(struct cpuidle_driver *drv,
> - struct cpuidle_device *dev)
> + struct cpuidle_device *dev, int latency_req)
> {return -ENODEV; }
> static inline int cpuidle_enter(struct cpuidle_driver *drv,
> struct cpuidle_device *dev, int index)
> @@ -205,7 +205,8 @@ struct cpuidle_governor {
> struct cpuidle_device *dev);
>
> int (*select) (struct cpuidle_driver *drv,
> - struct cpuidle_device *dev);
> + struct cpuidle_device *dev,
> + int latency_req);
> void (*reflect) (struct cpuidle_device *dev, int index);
>
> struct module *owner;
> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index 11e7bc4..25ba94d 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -5,6 +5,7 @@
> #include <linux/cpu.h>
> #include <linux/cpuidle.h>
> #include <linux/tick.h>
> +#include <linux/pm_qos.h>
> #include <linux/mm.h>
> #include <linux/stackprotector.h>
>
> @@ -74,7 +75,7 @@ void __weak arch_cpu_idle(void)
> * set, and it returns with polling set. If it ever stops polling, it
> * must clear the polling bit.
> */
> -static void cpuidle_idle_call(void)
> +static void cpuidle_idle_call(unsigned int latency_req)
> {
> struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
> struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
> @@ -107,7 +108,7 @@ static void cpuidle_idle_call(void)
> * Ask the cpuidle framework to choose a convenient idle state.
> * Fall back to the default arch idle method on errors.
> */
> - next_state = cpuidle_select(drv, dev);
> + next_state = cpuidle_select(drv, dev, latency_req);
> if (next_state < 0) {
> use_default:
> /*
> @@ -182,6 +183,8 @@ exit_idle:
> */
> static void cpu_idle_loop(void)
> {
> + unsigned int latency_req;
> +
> while (1) {
> /*
> * If the arch has a polling bit, we maintain an invariant:
> @@ -205,19 +208,26 @@ static void cpu_idle_loop(void)
> local_irq_disable();
> arch_cpu_idle_enter();
>
> + latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
> +
> /*
> * In poll mode we reenable interrupts and spin.
> *
> + * If the latency req is zero, we don't want to
> + * enter any idle state and we jump to the poll
> + * function directly
> + *
> * Also if we detected in the wakeup from idle
> * path that the tick broadcast device expired
> * for us, we don't want to go deep idle as we
> * know that the IPI is going to arrive right
> * away
> */
> - if (cpu_idle_force_poll || tick_check_broadcast_expired())
> + if (!latency_req || cpu_idle_force_poll ||
> + tick_check_broadcast_expired())
> cpu_idle_poll();
> else
> - cpuidle_idle_call();
> + cpuidle_idle_call(latency_req);
>
> arch_cpu_idle_exit();
> }
> --
> 1.9.1
>
>

2014-10-22 20:38:16

by Nicolas Pitre

[permalink] [raw]

Subject: Re: [PATCH 2/5] sched: idle: Get the next timer event and pass it the cpuidle framework

On Mon, 20 Oct 2014, Daniel Lezcano wrote:

> Following the logic of the previous patch, retrieve from the idle task the
> expected timer sleep duration and pass it to the cpuidle framework.
>
> Take the opportunity to remove the unused headers in the menu.c file.
>
> This patch does not change the current behavior.
>
> Signed-off-by: Daniel Lezcano <[email protected]>

One minor nit below.

> @@ -211,6 +212,12 @@ static void cpu_idle_loop(void)
> latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
>
> /*
> + * The next timer event in us
> + */

This 3-line comment is redundant. The code is obvious enough on its own.

> + next_timer_event = ktime_to_us(
> + tick_nohz_get_sleep_length());

I'd suggest this form for better readability:

next_timer_event =
ktime_to_us(tick_nohz_get_sleep_length());

Other than that...

Acked-by: Nicolas Pitre <[email protected]>

> +
> + /*
> * In poll mode we reenable interrupts and spin.
> *
> * If the latency req is zero, we don't want to
> @@ -227,7 +234,8 @@ static void cpu_idle_loop(void)
> tick_check_broadcast_expired())
> cpu_idle_poll();
> else
> - cpuidle_idle_call(latency_req);
> + cpuidle_idle_call(latency_req,
> + next_timer_event);
>
> arch_cpu_idle_exit();
> }
> --
> 1.9.1
>
>

2014-10-22 20:41:42

by Nicolas Pitre

[permalink] [raw]

Subject: Re: [PATCH 3/5] cpuidle: idle: menu: Don't reflect when a state selection failed

On Mon, 20 Oct 2014, Daniel Lezcano wrote:

> In the current code, the check to reflect or not the outcoming state is done
> against the idle state which has been choose and its value.

s/choose/chosen/

> Instead of doing a check in each of the reflect functions, just don't call reflect
> if something went wrong in the idle path.
>
> Signed-off-by: Daniel Lezcano <[email protected]>

Acked-by: Nicolas Pitre <[email protected]>

> ---
> drivers/cpuidle/governors/ladder.c | 3 +--
> drivers/cpuidle/governors/menu.c | 4 +---
> kernel/sched/idle.c | 3 ++-
> 3 files changed, 4 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
> index fb396d6..c0b36a8 100644
> --- a/drivers/cpuidle/governors/ladder.c
> +++ b/drivers/cpuidle/governors/ladder.c
> @@ -165,8 +165,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
> static void ladder_reflect(struct cpuidle_device *dev, int index)
> {
> struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
> - if (index > 0)
> - ldev->last_state_idx = index;
> + ldev->last_state_idx = index;
> }
>
> static struct cpuidle_governor ladder_governor = {
> diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
> index a17515f..3907301 100644
> --- a/drivers/cpuidle/governors/menu.c
> +++ b/drivers/cpuidle/governors/menu.c
> @@ -365,9 +365,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> static void menu_reflect(struct cpuidle_device *dev, int index)
> {
> struct menu_device *data = &__get_cpu_var(menu_devices);
> - data->last_state_idx = index;
> - if (index >= 0)
> - data->needs_update = 1;
> + data->needs_update = 1;
> }
>
> /**
> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index f439161..9ac7322 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -162,7 +162,8 @@ use_default:
> /*
> * Give the governor an opportunity to reflect on the outcome
> */
> - cpuidle_reflect(dev, entered_state);
> + if (entered_state >= 0)
> + cpuidle_reflect(dev, entered_state);
>
> exit_idle:
> __current_set_polling();
> --
> 1.9.1
>
>

2014-10-24 13:24:35

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH 3/5] cpuidle: idle: menu: Don't reflect when a state selection failed

On Mon, Oct 20, 2014 at 06:25:41PM +0200, Daniel Lezcano wrote:
> - if (index > 0)
> - if (index >= 0)

That's not the same condition.

2014-10-24 13:32:58

by Daniel Lezcano

[permalink] [raw]

Subject: Re: [PATCH 3/5] cpuidle: idle: menu: Don't reflect when a state selection failed

On 10/24/2014 03:24 PM, Peter Zijlstra wrote:
> On Mon, Oct 20, 2014 at 06:25:41PM +0200, Daniel Lezcano wrote:
>> - if (index > 0)
>> - if (index >= 0)
>
> That's not the same condition.

Yes and it is wrong. That is the result of the
CPUIDLE_DRIVER_STATE_START dance.

The ladder governor is avoiding to use the POLL state as it was running
on x86. But on, eg. ARM, we will never reflect the state 0 because
CPUIDLE_DRIVER_STATE_START is equal to zero for all non-x86 platform.

If I am not wrong the ladder select function will never choose the state
0 for x86, so it will never reflect the state 0 (after applying the
patch 1/5). For the other arch it will reflect the state 0 as it should.

--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

2014-10-24 13:38:16

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH 3/5] cpuidle: idle: menu: Don't reflect when a state selection failed

On Mon, Oct 20, 2014 at 06:25:41PM +0200, Daniel Lezcano wrote:
> +++ b/kernel/sched/idle.c
> @@ -162,7 +162,8 @@ use_default:
> /*
> * Give the governor an opportunity to reflect on the outcome
> */
> - cpuidle_reflect(dev, entered_state);
> + if (entered_state >= 0)
> + cpuidle_reflect(dev, entered_state);
>

Given we'll do use_default: when next_state < 0, we actually never get
here unless this is true.

2014-10-24 13:44:03

by Daniel Lezcano

[permalink] [raw]

Subject: Re: [PATCH 3/5] cpuidle: idle: menu: Don't reflect when a state selection failed

On 10/24/2014 03:38 PM, Peter Zijlstra wrote:
> On Mon, Oct 20, 2014 at 06:25:41PM +0200, Daniel Lezcano wrote:
>> +++ b/kernel/sched/idle.c
>> @@ -162,7 +162,8 @@ use_default:
>> /*
>> * Give the governor an opportunity to reflect on the outcome
>> */
>> - cpuidle_reflect(dev, entered_state);
>> + if (entered_state >= 0)
>> + cpuidle_reflect(dev, entered_state);
>>
>
> Given we'll do use_default: when next_state < 0, we actually never get
> here unless this is true.

There is the 'cpuidle_enter' call in between which may fail.

--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

2014-10-24 13:56:13

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH 3/5] cpuidle: idle: menu: Don't reflect when a state selection failed

On Fri, Oct 24, 2014 at 03:43:53PM +0200, Daniel Lezcano wrote:
> On 10/24/2014 03:38 PM, Peter Zijlstra wrote:
> >On Mon, Oct 20, 2014 at 06:25:41PM +0200, Daniel Lezcano wrote:
> >>+++ b/kernel/sched/idle.c
> >>@@ -162,7 +162,8 @@ use_default:
> >> /*
> >> * Give the governor an opportunity to reflect on the outcome
> >> */
> >>- cpuidle_reflect(dev, entered_state);
> >>+ if (entered_state >= 0)
> >>+ cpuidle_reflect(dev, entered_state);
> >>
> >
> >Given we'll do use_default: when next_state < 0, we actually never get
> >here unless this is true.
>
> There is the 'cpuidle_enter' call in between which may fail.

Hmm, indeed there is. I had not expected that one to fail like this.

2014-10-27 22:38:25

by Rafael J. Wysocki

[permalink] [raw]

Subject: Re: [PATCH 1/5] sched: idle: cpuidle: Check the latency req before idle

On Monday, October 20, 2014 06:25:39 PM Daniel Lezcano wrote:
> When the pmqos latency requirement is set to zero that means "poll in all the
> cases".
>
> That is correctly implemented on x86 but not on the other archs.
>
> As how is written the code, if the latency request is zero, the governor will
> return zero, so corresponding, for x86, to the poll function, but for the
> others arch the default idle function. For example, on ARM this is wait-for-
> interrupt with a latency of '1', so violating the constraint.
>
> In order to fix that, do the latency requirement check *before* calling the
> cpuidle framework in order to jump to the poll function without entering
> cpuidle. That has several benefits:
>
> 1. It clarifies and unifies the code
> 2. It fixes x86 vs other archs behavior
> 3. Factors out the call to the same function
> 4. Prevent to enter the cpuidle framework with its expensive cost in
> calculation
>
> As the latency_req is needed in all the cases, change the select API to take
> the latency_req as parameter in case it is not equal to zero.
>
> As a positive side effect, it introduces the latency constraint specified
> externally, so one more step to the cpuidle/scheduler integration.
>
> Signed-off-by: Daniel Lezcano <[email protected]>

I've discussed this series with Len and the patches look good to us.

I can apply them unless Peter prefers to take them through sched. In which
case Peter please add my ACK to them.

> ---
> drivers/cpuidle/cpuidle.c | 5 +++--
> drivers/cpuidle/governors/ladder.c | 9 +--------
> drivers/cpuidle/governors/menu.c | 8 ++------
> include/linux/cpuidle.h | 7 ++++---
> kernel/sched/idle.c | 18 ++++++++++++++----
> 5 files changed, 24 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index ee9df5e..372c36f 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -158,7 +158,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
> *
> * Returns the index of the idle state.
> */
> -int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> +int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> + int latency_req)
> {
> if (off || !initialized)
> return -ENODEV;
> @@ -169,7 +170,7 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> if (unlikely(use_deepest_state))
> return cpuidle_find_deepest_state(drv, dev);
>
> - return cpuidle_curr_governor->select(drv, dev);
> + return cpuidle_curr_governor->select(drv, dev, latency_req);
> }
>
> /**
> diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
> index 044ee0d..18f0da9 100644
> --- a/drivers/cpuidle/governors/ladder.c
> +++ b/drivers/cpuidle/governors/ladder.c
> @@ -64,18 +64,11 @@ static inline void ladder_do_selection(struct ladder_device *ldev,
> * @dev: the CPU
> */
> static int ladder_select_state(struct cpuidle_driver *drv,
> - struct cpuidle_device *dev)
> + struct cpuidle_device *dev, int latency_req)
> {
> struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
> struct ladder_device_state *last_state;
> int last_residency, last_idx = ldev->last_state_idx;
> - int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
> -
> - /* Special case when user has set very strict latency requirement */
> - if (unlikely(latency_req == 0)) {
> - ladder_do_selection(ldev, last_idx, 0);
> - return 0;
> - }
>
> last_state = &ldev->states[last_idx];
>
> diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
> index 34db2fb..96f8fb0 100644
> --- a/drivers/cpuidle/governors/menu.c
> +++ b/drivers/cpuidle/governors/menu.c
> @@ -287,10 +287,10 @@ again:
> * @drv: cpuidle driver containing state data
> * @dev: the CPU
> */
> -static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> +static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> + int latency_req)
> {
> struct menu_device *data = &__get_cpu_var(menu_devices);
> - int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
> int i;
> unsigned int interactivity_req;
> unsigned long nr_iowaiters, cpu_load;
> @@ -302,10 +302,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
>
> data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
>
> - /* Special case when user has set very strict latency requirement */
> - if (unlikely(latency_req == 0))
> - return 0;
> -
> /* determine the expected residency time, round up */
> data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
>
> diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
> index 25e0df6..fb465c1 100644
> --- a/include/linux/cpuidle.h
> +++ b/include/linux/cpuidle.h
> @@ -122,7 +122,7 @@ struct cpuidle_driver {
> extern void disable_cpuidle(void);
>
> extern int cpuidle_select(struct cpuidle_driver *drv,
> - struct cpuidle_device *dev);
> + struct cpuidle_device *dev, int latency_req);
> extern int cpuidle_enter(struct cpuidle_driver *drv,
> struct cpuidle_device *dev, int index);
> extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
> @@ -150,7 +150,7 @@ extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
> #else
> static inline void disable_cpuidle(void) { }
> static inline int cpuidle_select(struct cpuidle_driver *drv,
> - struct cpuidle_device *dev)
> + struct cpuidle_device *dev, int latency_req)
> {return -ENODEV; }
> static inline int cpuidle_enter(struct cpuidle_driver *drv,
> struct cpuidle_device *dev, int index)
> @@ -205,7 +205,8 @@ struct cpuidle_governor {
> struct cpuidle_device *dev);
>
> int (*select) (struct cpuidle_driver *drv,
> - struct cpuidle_device *dev);
> + struct cpuidle_device *dev,
> + int latency_req);
> void (*reflect) (struct cpuidle_device *dev, int index);
>
> struct module *owner;
> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index 11e7bc4..25ba94d 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -5,6 +5,7 @@
> #include <linux/cpu.h>
> #include <linux/cpuidle.h>
> #include <linux/tick.h>
> +#include <linux/pm_qos.h>
> #include <linux/mm.h>
> #include <linux/stackprotector.h>
>
> @@ -74,7 +75,7 @@ void __weak arch_cpu_idle(void)
> * set, and it returns with polling set. If it ever stops polling, it
> * must clear the polling bit.
> */
> -static void cpuidle_idle_call(void)
> +static void cpuidle_idle_call(unsigned int latency_req)
> {
> struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
> struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
> @@ -107,7 +108,7 @@ static void cpuidle_idle_call(void)
> * Ask the cpuidle framework to choose a convenient idle state.
> * Fall back to the default arch idle method on errors.
> */
> - next_state = cpuidle_select(drv, dev);
> + next_state = cpuidle_select(drv, dev, latency_req);
> if (next_state < 0) {
> use_default:
> /*
> @@ -182,6 +183,8 @@ exit_idle:
> */
> static void cpu_idle_loop(void)
> {
> + unsigned int latency_req;
> +
> while (1) {
> /*
> * If the arch has a polling bit, we maintain an invariant:
> @@ -205,19 +208,26 @@ static void cpu_idle_loop(void)
> local_irq_disable();
> arch_cpu_idle_enter();
>
> + latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
> +
> /*
> * In poll mode we reenable interrupts and spin.
> *
> + * If the latency req is zero, we don't want to
> + * enter any idle state and we jump to the poll
> + * function directly
> + *
> * Also if we detected in the wakeup from idle
> * path that the tick broadcast device expired
> * for us, we don't want to go deep idle as we
> * know that the IPI is going to arrive right
> * away
> */
> - if (cpu_idle_force_poll || tick_check_broadcast_expired())
> + if (!latency_req || cpu_idle_force_poll ||
> + tick_check_broadcast_expired())
> cpu_idle_poll();
> else
> - cpuidle_idle_call();
> + cpuidle_idle_call(latency_req);
>
> arch_cpu_idle_exit();
> }
>

--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

2014-10-28 01:50:00

by Len Brown

[permalink] [raw]

Subject: Re: [PATCH 1/5] sched: idle: cpuidle: Check the latency req before idle

Looks fine, Daniel.

cheers,
-Len

Reviewed-by: Len Brown <[email protected]>

2014-10-28 01:53:58

by Len Brown

[permalink] [raw]

Subject: Re: [PATCH 2/5] sched: idle: Get the next timer event and pass it the cpuidle framework

Reviewed-by: Len Brown <[email protected]>

On Wed, Oct 22, 2014 at 4:38 PM, Nicolas Pitre <[email protected]> wrote:
> On Mon, 20 Oct 2014, Daniel Lezcano wrote:
>
>> Following the logic of the previous patch, retrieve from the idle task the
>> expected timer sleep duration and pass it to the cpuidle framework.
>>
>> Take the opportunity to remove the unused headers in the menu.c file.
>>
>> This patch does not change the current behavior.
>>
>> Signed-off-by: Daniel Lezcano <[email protected]>
>
> One minor nit below.
>
>> @@ -211,6 +212,12 @@ static void cpu_idle_loop(void)
>> latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
>>
>> /*
>> + * The next timer event in us
>> + */
>
> This 3-line comment is redundant. The code is obvious enough on its own.
>
>> + next_timer_event = ktime_to_us(
>> + tick_nohz_get_sleep_length());
>
> I'd suggest this form for better readability:
>
> next_timer_event =
> ktime_to_us(tick_nohz_get_sleep_length());
>
> Other than that...
>
> Acked-by: Nicolas Pitre <[email protected]>
>
>> +
>> + /*
>> * In poll mode we reenable interrupts and spin.
>> *
>> * If the latency req is zero, we don't want to
>> @@ -227,7 +234,8 @@ static void cpu_idle_loop(void)
>> tick_check_broadcast_expired())
>> cpu_idle_poll();
>> else
>> - cpuidle_idle_call(latency_req);
>> + cpuidle_idle_call(latency_req,
>> + next_timer_event);
>>
>> arch_cpu_idle_exit();
>> }
>> --
>> 1.9.1
>>
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

--
Len Brown, Intel Open Source Technology Center

2014-10-28 06:40:11

by Preeti Murthy

[permalink] [raw]

Subject: Re: [PATCH 2/5] sched: idle: Get the next timer event and pass it the cpuidle framework

Hi Daniel,

On Mon, Oct 20, 2014 at 9:55 PM, Daniel Lezcano
<[email protected]> wrote:
> Following the logic of the previous patch, retrieve from the idle task the
> expected timer sleep duration and pass it to the cpuidle framework.
>
> Take the opportunity to remove the unused headers in the menu.c file.
>
> This patch does not change the current behavior.
>
> Signed-off-by: Daniel Lezcano <[email protected]>
> ---
> drivers/cpuidle/cpuidle.c | 11 +++++------
> drivers/cpuidle/governors/ladder.c | 3 ++-
> drivers/cpuidle/governors/menu.c | 8 ++------
> include/linux/cpuidle.h | 8 +++++---
> kernel/sched/idle.c | 16 ++++++++++++----
> 5 files changed, 26 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index 372c36f..64f5800 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -8,16 +8,12 @@
> * This code is licenced under the GPL.
> */
>
> -#include <linux/clockchips.h>
> #include <linux/kernel.h>
> #include <linux/mutex.h>
> -#include <linux/sched.h>
> #include <linux/notifier.h>
> #include <linux/pm_qos.h>
> #include <linux/cpu.h>
> #include <linux/cpuidle.h>
> -#include <linux/ktime.h>
> -#include <linux/hrtimer.h>
> #include <linux/module.h>
> #include <trace/events/power.h>
>
> @@ -155,11 +151,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
> *
> * @drv: the cpuidle driver
> * @dev: the cpuidle device
> + * @latency_req: the latency constraint when choosing an idle state

You might want to include this change in the previous patch itself.

> + * @next_timer_event: the duration until the timer expires
> *
> * Returns the index of the idle state.
> */

Regards
Preeti U Murthy

2014-10-28 08:30:54

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH 1/5] sched: idle: cpuidle: Check the latency req before idle

On Mon, Oct 27, 2014 at 11:58:57PM +0100, Rafael J. Wysocki wrote:
> I can apply them unless Peter prefers to take them through sched. In which
> case Peter please add my ACK to them.

This is mostly cpuidle bits, feel free to take them.

Acked-by: Peter Zijlstra (Intel) <[email protected]>

2014-10-28 18:02:07

by Daniel Lezcano

[permalink] [raw]

Subject: Re: [PATCH 2/5] sched: idle: Get the next timer event and pass it the cpuidle framework

On 10/28/2014 07:40 AM, Preeti Murthy wrote:
> Hi Daniel,
>
> On Mon, Oct 20, 2014 at 9:55 PM, Daniel Lezcano
> <[email protected]> wrote:
>> Following the logic of the previous patch, retrieve from the idle task the
>> expected timer sleep duration and pass it to the cpuidle framework.
>>
>> Take the opportunity to remove the unused headers in the menu.c file.
>>
>> This patch does not change the current behavior.
>>
>> Signed-off-by: Daniel Lezcano <[email protected]>
>> ---
>> drivers/cpuidle/cpuidle.c | 11 +++++------
>> drivers/cpuidle/governors/ladder.c | 3 ++-
>> drivers/cpuidle/governors/menu.c | 8 ++------
>> include/linux/cpuidle.h | 8 +++++---
>> kernel/sched/idle.c | 16 ++++++++++++----
>> 5 files changed, 26 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
>> index 372c36f..64f5800 100644
>> --- a/drivers/cpuidle/cpuidle.c
>> +++ b/drivers/cpuidle/cpuidle.c
>> @@ -8,16 +8,12 @@
>> * This code is licenced under the GPL.
>> */
>>
>> -#include <linux/clockchips.h>
>> #include <linux/kernel.h>
>> #include <linux/mutex.h>
>> -#include <linux/sched.h>
>> #include <linux/notifier.h>
>> #include <linux/pm_qos.h>
>> #include <linux/cpu.h>
>> #include <linux/cpuidle.h>
>> -#include <linux/ktime.h>
>> -#include <linux/hrtimer.h>
>> #include <linux/module.h>
>> #include <trace/events/power.h>
>>
>> @@ -155,11 +151,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
>> *
>> * @drv: the cpuidle driver
>> * @dev: the cpuidle device
>> + * @latency_req: the latency constraint when choosing an idle state
>
> You might want to include this change in the previous patch itself.

Ah, yes. Thanks.

>> + * @next_timer_event: the duration until the timer expires
>> *
>> * Returns the index of the idle state.
>> */
>
> Regards
> Preeti U Murthy
>

--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog