Hey,
Presented herewith a series with aims to tie in together the haltpoll
idle driver and governor, without sacrificing previous governor setups.
In addition, there are a few fixes with respect to module loading for
cpuidle-haltpoll.
The series is organized as follows:
Patch 1: Allows idle driver stating a preferred governor that it
wants to use, based on discussion here:
https://lore.kernel.org/kvm/[email protected]/
Patch 2: Decrease rating of governor, and allows previous defaults
to be as before haltpoll, while using @governor to switch to haltpoll
when haltpoll driver is registered;
Patch 3 - 4: Module loading fixes. first is the incorrect error
reporting and second is supportting module unloading.
Thanks,
Joao
v3:
* Fixed ARM build issues.
v2:
* Add missing Fixes tag on patches 3 and 4.
Joao Martins (4):
cpuidle: allow governor switch on cpuidle_register_driver()
cpuidle-haltpoll: set haltpoll as preferred governor
cpuidle-haltpoll: return -ENODEV on modinit failure
cpuidle-haltpoll: do not set an owner to allow modunload
drivers/cpuidle/cpuidle-haltpoll.c | 4 ++--
drivers/cpuidle/cpuidle.h | 2 ++
drivers/cpuidle/driver.c | 25 +++++++++++++++++++++++++
drivers/cpuidle/governor.c | 7 ++++---
drivers/cpuidle/governors/haltpoll.c | 2 +-
include/linux/cpuidle.h | 3 +++
6 files changed, 37 insertions(+), 6 deletions(-)
--
2.17.1
The recently introduced haltpoll driver is largely only useful with
haltpoll governor. To allow drivers to associate with a particular idle
behaviour, add a @governor property to 'struct cpuidle_driver' and thus
allow a cpuidle driver to switch to a *preferred* governor on idle driver
registration. We save the previous governor, and when an idle driver is
unregistered we switch back to that.
The @governor can be overridden by cpuidle.governor= boot param or
alternatively be ignored if the governor doesn't exist.
Signed-off-by: Joao Martins <[email protected]>
---
v3:
* Fix kbuild test robot arm issues with CONFIG_CPU_IDLE_MULTIPLE_DRIVERS=y
Specifically, moved @cpuidle_prev_governor to a more generic place
as this is not really tied to !CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
---
drivers/cpuidle/cpuidle.h | 2 ++
drivers/cpuidle/driver.c | 25 +++++++++++++++++++++++++
drivers/cpuidle/governor.c | 7 ++++---
include/linux/cpuidle.h | 3 +++
4 files changed, 34 insertions(+), 3 deletions(-)
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index d6613101af92..9f336af17fa6 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -9,6 +9,7 @@
/* For internal use only */
extern char param_governor[];
extern struct cpuidle_governor *cpuidle_curr_governor;
+extern struct cpuidle_governor *cpuidle_prev_governor;
extern struct list_head cpuidle_governors;
extern struct list_head cpuidle_detected_devices;
extern struct mutex cpuidle_lock;
@@ -22,6 +23,7 @@ extern void cpuidle_install_idle_handler(void);
extern void cpuidle_uninstall_idle_handler(void);
/* governors */
+extern struct cpuidle_governor *cpuidle_find_governor(const char *str);
extern int cpuidle_switch_governor(struct cpuidle_governor *gov);
/* sysfs */
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index dc32f34e68d9..80c1a830d991 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -254,12 +254,25 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
*/
int cpuidle_register_driver(struct cpuidle_driver *drv)
{
+ struct cpuidle_governor *gov;
int ret;
spin_lock(&cpuidle_driver_lock);
ret = __cpuidle_register_driver(drv);
spin_unlock(&cpuidle_driver_lock);
+ if (!ret && !strlen(param_governor) && drv->governor &&
+ (cpuidle_get_driver() == drv)) {
+ mutex_lock(&cpuidle_lock);
+ gov = cpuidle_find_governor(drv->governor);
+ if (gov) {
+ cpuidle_prev_governor = cpuidle_curr_governor;
+ if (cpuidle_switch_governor(gov) < 0)
+ cpuidle_prev_governor = NULL;
+ }
+ mutex_unlock(&cpuidle_lock);
+ }
+
return ret;
}
EXPORT_SYMBOL_GPL(cpuidle_register_driver);
@@ -274,9 +287,21 @@ EXPORT_SYMBOL_GPL(cpuidle_register_driver);
*/
void cpuidle_unregister_driver(struct cpuidle_driver *drv)
{
+ bool enabled = (cpuidle_get_driver() == drv);
+
spin_lock(&cpuidle_driver_lock);
__cpuidle_unregister_driver(drv);
spin_unlock(&cpuidle_driver_lock);
+
+ if (!enabled)
+ return;
+
+ mutex_lock(&cpuidle_lock);
+ if (cpuidle_prev_governor) {
+ if (!cpuidle_switch_governor(cpuidle_prev_governor))
+ cpuidle_prev_governor = NULL;
+ }
+ mutex_unlock(&cpuidle_lock);
}
EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index 2e3e14192bee..e9801f26c732 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -20,14 +20,15 @@ char param_governor[CPUIDLE_NAME_LEN];
LIST_HEAD(cpuidle_governors);
struct cpuidle_governor *cpuidle_curr_governor;
+struct cpuidle_governor *cpuidle_prev_governor;
/**
- * __cpuidle_find_governor - finds a governor of the specified name
+ * cpuidle_find_governor - finds a governor of the specified name
* @str: the name
*
* Must be called with cpuidle_lock acquired.
*/
-static struct cpuidle_governor * __cpuidle_find_governor(const char *str)
+struct cpuidle_governor *cpuidle_find_governor(const char *str)
{
struct cpuidle_governor *gov;
@@ -87,7 +88,7 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
return -ENODEV;
mutex_lock(&cpuidle_lock);
- if (__cpuidle_find_governor(gov->name) == NULL) {
+ if (cpuidle_find_governor(gov->name) == NULL) {
ret = 0;
list_add_tail(&gov->governor_list, &cpuidle_governors);
if (!cpuidle_curr_governor ||
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 1a9f54eb3aa1..2dc4c6b19c25 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -121,6 +121,9 @@ struct cpuidle_driver {
/* the driver handles the cpus in cpumask */
struct cpumask *cpumask;
+
+ /* preferred governor to switch at register time */
+ const char *governor;
};
#ifdef CONFIG_CPU_IDLE
--
2.17.1
Right now, guest current governors have the following ratings:
* ladder -> 10
* teo -> 19
* menu -> 20
* haltpoll -> 21
* ladder + nohz=off -> 25
haltpoll governor got introduced and it is now the default governor given
its highest rating -- with ladder+nohz being the exception -- regardless of
idle driver in the guest. An example of an undesirable case is x86 KVM
guests with MWAIT which have intel_idle registered first, and consequently
will have haltpoll be used as governor which would get limited to a poll
state and state 1 and the other states wouldn't get used.
To keep the previous defaults we decrease rating of governor to 9 (below
current lowest rating) and thus rely on @governor switch on
cpuidle_register_driver() to tie in haltpoll idle driver and governor
together.
Signed-off-by: Joao Martins <[email protected]>
---
drivers/cpuidle/cpuidle-haltpoll.c | 1 +
drivers/cpuidle/governors/haltpoll.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index 56d8ab814466..519e90d125cf 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -34,6 +34,7 @@ static int default_enter_idle(struct cpuidle_device *dev,
static struct cpuidle_driver haltpoll_driver = {
.name = "haltpoll",
+ .governor = "haltpoll",
.owner = THIS_MODULE,
.states = {
{ /* entry 0 is for polling */ },
diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c
index 797477bda486..7a703d2e0064 100644
--- a/drivers/cpuidle/governors/haltpoll.c
+++ b/drivers/cpuidle/governors/haltpoll.c
@@ -133,7 +133,7 @@ static int haltpoll_enable_device(struct cpuidle_driver *drv,
static struct cpuidle_governor haltpoll_governor = {
.name = "haltpoll",
- .rating = 21,
+ .rating = 9,
.enable = haltpoll_enable_device,
.select = haltpoll_select,
.reflect = haltpoll_reflect,
--
2.17.1
When a user loads cpuidle-haltpoll on a non KVM guest the module will
successfully load, even though idle driver registration didn't take
place.
We should instead return -ENODEV signaling the user that the driver can't
be loaded, like other error paths in haltpoll_init(). An example of such
error paths is when we return -EBUSY when attempting to register an idle
driver when it had one already (e.g. intel_idle loads at boot and then we
attempt to insert module cpuidle-haltpoll).
Fixes: fa86ee90eb11 ("add cpuidle-haltpoll driver")
Signed-off-by: Joao Martins <[email protected]>
---
v2:
* Added Fixes tag
---
drivers/cpuidle/cpuidle-haltpoll.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index 519e90d125cf..7a0239ef717e 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -99,7 +99,7 @@ static int __init haltpoll_init(void)
cpuidle_poll_state_init(drv);
if (!kvm_para_available())
- return 0;
+ return -ENODEV;
ret = cpuidle_register_driver(drv);
if (ret < 0)
--
2.17.1