Following patch series ports the cpuidle framework for powernv
platform and also implements a cpuidle back-end powernv
idle driver calling on to power7_nap and snooze idle states.
Moving the idle states over to cpuidle framework can take advantage
of advanced heuristics, tunables and features provided by cpuidle
framework. Additional idle states can be exploited using the cpuidle
framework. The statistics and tracing infrastructure provided by
the cpuidle framework also helps in enabling power management
related tools and help tune the system and applications.
This series aims to maintain compatibility and functionality to
existing powernv idle cpu management code. There are no new functions
or idle states added as part of this series. This can be extended by
adding more states to this existing framework.
With this patch series the powernv cpuidle functionalities
are on-par with pSeries idle management.
For POWERNV platform to hook into CPUIDLE framework, one
needs to enable CONFIG_POWERNV_IDLE and disable
CONFIG_PSERIES_IDLE
Deepthi Dharwar (3):
cpuidle/powernv: cpuidle backend driver for powernv
cpuidle/powernv: Enable idle powernv cpu to call into the cpuidle framework.
cpuidle/powernv: Support smt-snooze-delay parameter in powernv idle.
arch/powerpc/include/asm/processor.h | 2
arch/powerpc/platforms/powernv/Kconfig | 9 +
arch/powerpc/platforms/powernv/Makefile | 1
arch/powerpc/platforms/powernv/powernv.h | 3
arch/powerpc/platforms/powernv/processor_idle.c | 275 +++++++++++++++++++++++
arch/powerpc/platforms/powernv/setup.c | 12 +
6 files changed, 300 insertions(+), 2 deletions(-)
create mode 100644 arch/powerpc/platforms/powernv/processor_idle.c
-- Deepthi
This patch implements a back-end cpuidle driver for
powernv calling power7_nap and snooze idle states.
This can be extended by adding more idle states
in the future to the existing framework.
Signed-off-by: Deepthi Dharwar <[email protected]>
---
arch/powerpc/platforms/powernv/Kconfig | 9 +
arch/powerpc/platforms/powernv/Makefile | 1
arch/powerpc/platforms/powernv/processor_idle.c | 239 +++++++++++++++++++++++
3 files changed, 249 insertions(+)
create mode 100644 arch/powerpc/platforms/powernv/processor_idle.c
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index c24684c..ace2d22 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -20,3 +20,12 @@ config PPC_POWERNV_RTAS
default y
select PPC_ICS_RTAS
select PPC_RTAS
+
+config POWERNV_IDLE
+ bool "CPUIdle driver for powernv platform"
+ depends on CPU_IDLE
+ depends on PPC_POWERNV
+ default y
+ help
+ Select this option to enable processor idle state management
+ through cpuidle subsystem.
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 7fe5951..c0e44eb 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -4,3 +4,4 @@ obj-y += opal-rtc.o opal-nvram.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o
obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o
+obj-$(CONFIG_POWERNV_IDLE) += processor_idle.o
diff --git a/arch/powerpc/platforms/powernv/processor_idle.c b/arch/powerpc/platforms/powernv/processor_idle.c
new file mode 100644
index 0000000..f43ad91a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/processor_idle.c
@@ -0,0 +1,239 @@
+/*
+ * processor_idle - idle state cpuidle driver.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+
+#include <asm/machdep.h>
+#include <asm/runlatch.h>
+
+struct cpuidle_driver powernv_idle_driver = {
+ .name = "powernv_idle",
+ .owner = THIS_MODULE,
+};
+
+#define MAX_IDLE_STATE_COUNT 2
+
+static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
+static struct cpuidle_device __percpu *powernv_cpuidle_devices;
+static struct cpuidle_state *cpuidle_state_table;
+
+static int snooze_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ int cpu = dev->cpu;
+
+ local_irq_enable();
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
+ while ((!need_resched()) && cpu_online(cpu)) {
+ ppc64_runlatch_off();
+ HMT_very_low();
+ }
+
+ HMT_medium();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb();
+ return index;
+}
+
+
+static int nap_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ ppc64_runlatch_off();
+ power7_idle();
+ return index;
+}
+
+/*
+ * States for dedicated partition case.
+ */
+static struct cpuidle_state powernv_states[MAX_IDLE_STATE_COUNT] = {
+ { /* Snooze */
+ .name = "snooze",
+ .desc = "snooze",
+ .flags = CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 0,
+ .target_residency = 0,
+ .enter = &snooze_loop },
+ { /* Nap */
+ .name = "Nap",
+ .desc = "Nap",
+ .flags = CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 10,
+ .target_residency = 100,
+ .enter = &nap_loop },
+};
+
+static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
+ unsigned long action, void *hcpu)
+{
+ int hotcpu = (unsigned long)hcpu;
+ struct cpuidle_device *dev =
+ per_cpu_ptr(powernv_cpuidle_devices, hotcpu);
+
+ if (dev && cpuidle_get_driver()) {
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ cpuidle_pause_and_lock();
+ cpuidle_enable_device(dev);
+ cpuidle_resume_and_unlock();
+ break;
+
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ cpuidle_pause_and_lock();
+ cpuidle_disable_device(dev);
+ cpuidle_resume_and_unlock();
+ break;
+
+ default:
+ return NOTIFY_DONE;
+ }
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block setup_hotplug_notifier = {
+ .notifier_call = powernv_cpuidle_add_cpu_notifier,
+};
+
+/*
+ * powernv_cpuidle_driver_init()
+ */
+static int powernv_cpuidle_driver_init(void)
+{
+ int idle_state;
+ struct cpuidle_driver *drv = &powernv_idle_driver;
+
+ drv->state_count = 0;
+
+ for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) {
+
+ if (idle_state > max_idle_state)
+ break;
+
+ /* is the state not enabled? */
+ if (cpuidle_state_table[idle_state].enter == NULL)
+ continue;
+
+ drv->states[drv->state_count] = /* structure copy */
+ cpuidle_state_table[idle_state];
+
+ drv->state_count += 1;
+ }
+
+ return 0;
+}
+
+/* powernv_idle_devices_uninit(void)
+ * unregister cpuidle devices and de-allocate memory
+ */
+static void powernv_idle_devices_uninit(void)
+{
+ int i;
+ struct cpuidle_device *dev;
+
+ for_each_possible_cpu(i) {
+ dev = per_cpu_ptr(powernv_cpuidle_devices, i);
+ cpuidle_unregister_device(dev);
+ }
+
+ free_percpu(powernv_cpuidle_devices);
+ return;
+}
+
+/* powernv_idle_devices_init()
+ * allocate, initialize and register cpuidle device
+ */
+static int powernv_idle_devices_init(void)
+{
+ int i;
+ struct cpuidle_driver *drv = &powernv_idle_driver;
+ struct cpuidle_device *dev;
+
+ powernv_cpuidle_devices = alloc_percpu(struct cpuidle_device);
+ if (powernv_cpuidle_devices == NULL)
+ return -ENOMEM;
+
+ for_each_possible_cpu(i) {
+ dev = per_cpu_ptr(powernv_cpuidle_devices, i);
+ dev->state_count = drv->state_count;
+ dev->cpu = i;
+ if (cpuidle_register_device(dev)) {
+ printk(KERN_DEBUG \
+ "cpuidle_register_device %d failed!\n", i);
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
+/*
+ * powernv_idle_probe()
+ * Choose state table for shared versus dedicated partition
+ */
+static int powernv_idle_probe(void)
+{
+
+ if (cpuidle_disable != IDLE_NO_OVERRIDE)
+ return -ENODEV;
+
+ cpuidle_state_table = powernv_states;
+ return 0;
+}
+
+static int __init powernv_processor_idle_init(void)
+{
+ int retval;
+
+ retval = powernv_idle_probe();
+ if (retval)
+ return retval;
+
+ powernv_cpuidle_driver_init();
+ retval = cpuidle_register_driver(&powernv_idle_driver);
+ if (retval) {
+ printk(KERN_DEBUG "Registration of powernv driver failed.\n");
+ return retval;
+ }
+
+ retval = powernv_idle_devices_init();
+ if (retval) {
+ powernv_idle_devices_uninit();
+ cpuidle_unregister_driver(&powernv_idle_driver);
+ return retval;
+ }
+
+ register_cpu_notifier(&setup_hotplug_notifier);
+ printk(KERN_DEBUG "powernv_idle_driver registered\n");
+
+ return 0;
+}
+
+static void __exit powernv_processor_idle_exit(void)
+{
+
+ unregister_cpu_notifier(&setup_hotplug_notifier);
+ powernv_idle_devices_uninit();
+ cpuidle_unregister_driver(&powernv_idle_driver);
+
+ return;
+}
+
+module_init(powernv_processor_idle_init);
+module_exit(powernv_processor_idle_exit);
+
+MODULE_AUTHOR("Deepthi Dharwar <[email protected]>");
+MODULE_DESCRIPTION("Cpuidle driver for POWERNV");
+MODULE_LICENSE("GPL");
This patch enables idle powernv cpu to hook on to the cpuidle
framework, if available, else call on to default idle platform
code.
Signed-off-by: Deepthi Dharwar <[email protected]>
---
arch/powerpc/platforms/powernv/setup.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 84438af..97d0951 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -25,6 +25,7 @@
#include <linux/of.h>
#include <linux/interrupt.h>
#include <linux/bug.h>
+#include <linux/cpuidle.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
@@ -196,6 +197,15 @@ static int __init pnv_probe(void)
return 1;
}
+void powernv_idle(void)
+{
+ /* Hook to cpuidle framework if available, else
+ * call on default platform idle code
+ */
+ if (cpuidle_idle_call())
+ power7_idle();
+}
+
define_machine(powernv) {
.name = "PowerNV",
.probe = pnv_probe,
@@ -205,7 +215,7 @@ define_machine(powernv) {
.show_cpuinfo = pnv_show_cpuinfo,
.progress = pnv_progress,
.machine_shutdown = pnv_shutdown,
- .power_save = power7_idle,
+ .power_save = powernv_idle,
.calibrate_decr = generic_calibrate_decr,
#ifdef CONFIG_KEXEC
.kexec_cpu_down = pnv_kexec_cpu_down,
smt-snooze-delay is a tunable that is supported on
powerpc platform to delay the entry to nap state.
This can be set either via sysfs, kernel commandline
or pp64_cpu util.
Signed-off-by: Deepthi Dharwar <[email protected]>
---
arch/powerpc/include/asm/processor.h | 2 +
arch/powerpc/platforms/powernv/powernv.h | 3 ++
arch/powerpc/platforms/powernv/processor_idle.c | 36 +++++++++++++++++++++++
3 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 47a35b0..5700c3c 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -426,7 +426,7 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
extern void power7_nap(void);
-#ifdef CONFIG_PSERIES_IDLE
+#if defined(CONFIG_PSERIES_IDLE) || defined(CONFIG_POWERNV_IDLE)
extern void update_smt_snooze_delay(int cpu, int residency);
#else
static inline void update_smt_snooze_delay(int cpu, int residency) {}
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index a1c6f83..558ee69 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -15,4 +15,7 @@ static inline void pnv_pci_init(void) { }
static inline void pnv_pci_shutdown(void) { }
#endif
+/* Idle variable */
+DECLARE_PER_CPU(long, smt_snooze_delay);
+
#endif /* _POWERNV_H */
diff --git a/arch/powerpc/platforms/powernv/processor_idle.c b/arch/powerpc/platforms/powernv/processor_idle.c
index f43ad91a..505fea4 100644
--- a/arch/powerpc/platforms/powernv/processor_idle.c
+++ b/arch/powerpc/platforms/powernv/processor_idle.c
@@ -13,6 +13,8 @@
#include <asm/machdep.h>
#include <asm/runlatch.h>
+#include "powernv.h"
+
struct cpuidle_driver powernv_idle_driver = {
.name = "powernv_idle",
.owner = THIS_MODULE,
@@ -193,6 +195,38 @@ static int powernv_idle_probe(void)
return 0;
}
+void update_smt_snooze_delay(int cpu, int residency)
+{
+ struct cpuidle_driver *drv = cpuidle_get_driver();
+ struct cpuidle_device *dev;
+
+ if (!drv)
+ return;
+
+ if (cpu == -1) {
+ if (residency < 0) {
+ /* Disable NAP on all cpus */
+ drv->states[1].disabled = true;
+ } else {
+ drv->states[1].target_residency = residency;
+ drv->states[1].disabled = false;
+ }
+ return;
+ }
+
+ dev = per_cpu(cpuidle_devices, cpu);
+ if (!dev)
+ return;
+
+ if (residency < 0)
+ dev->states_usage[1].disable = 1;
+ else {
+ drv->states[1].target_residency = residency;
+ drv->states[1].disabled = false;
+ dev->states_usage[1].disable = 0;
+ }
+}
+
static int __init powernv_processor_idle_init(void)
{
int retval;
@@ -208,6 +242,8 @@ static int __init powernv_processor_idle_init(void)
return retval;
}
+ update_smt_snooze_delay(-1, per_cpu(smt_snooze_delay, 0));
+
retval = powernv_idle_devices_init();
if (retval) {
powernv_idle_devices_uninit();
On Tue, Jul 23, 2013 at 02:31:41PM +0530, Deepthi Dharwar wrote:
> This patch implements a back-end cpuidle driver for
> powernv calling power7_nap and snooze idle states.
> This can be extended by adding more idle states
> in the future to the existing framework.
Other than the state table and a few minor details this looks almost
identical to the pseries driver. Can we not have a single version in
sysdev and isolate just the differences?
cheers
On 07/23/2013 07:36 PM, Michael Ellerman wrote:
> On Tue, Jul 23, 2013 at 02:31:41PM +0530, Deepthi Dharwar wrote:
>> This patch implements a back-end cpuidle driver for
>> powernv calling power7_nap and snooze idle states.
>> This can be extended by adding more idle states
>> in the future to the existing framework.
>
> Other than the state table and a few minor details this looks almost
> identical to the pseries driver. Can we not have a single version in
> sysdev and isolate just the differences?
>
Hi Michael,
Yes, I was actually looking at consolidating and moving all the powerpc
cpuidle driver code to drivers/cpuidle/. sysdev also seems fine. Let me
redo and club the drivers and have a single version of the code in
sysdev for both powerpc and powernv platforms.
Thanks !
Deepthi
> cheers
>
On 07/23/2013 11:01 AM, Deepthi Dharwar wrote:
> This patch implements a back-end cpuidle driver for
> powernv calling power7_nap and snooze idle states.
> This can be extended by adding more idle states
> in the future to the existing framework.
>
> Signed-off-by: Deepthi Dharwar <[email protected]>
> ---
> arch/powerpc/platforms/powernv/Kconfig | 9 +
> arch/powerpc/platforms/powernv/Makefile | 1
> arch/powerpc/platforms/powernv/processor_idle.c | 239 +++++++++++++++++++++++
> 3 files changed, 249 insertions(+)
> create mode 100644 arch/powerpc/platforms/powernv/processor_idle.c
>
> diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
> index c24684c..ace2d22 100644
> --- a/arch/powerpc/platforms/powernv/Kconfig
> +++ b/arch/powerpc/platforms/powernv/Kconfig
> @@ -20,3 +20,12 @@ config PPC_POWERNV_RTAS
> default y
> select PPC_ICS_RTAS
> select PPC_RTAS
> +
> +config POWERNV_IDLE
> + bool "CPUIdle driver for powernv platform"
> + depends on CPU_IDLE
> + depends on PPC_POWERNV
> + default y
> + help
> + Select this option to enable processor idle state management
> + through cpuidle subsystem.
> diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
> index 7fe5951..c0e44eb 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -4,3 +4,4 @@ obj-y += opal-rtc.o opal-nvram.o
> obj-$(CONFIG_SMP) += smp.o
> obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o
> obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o
> +obj-$(CONFIG_POWERNV_IDLE) += processor_idle.o
> diff --git a/arch/powerpc/platforms/powernv/processor_idle.c b/arch/powerpc/platforms/powernv/processor_idle.c
> new file mode 100644
> index 0000000..f43ad91a
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/processor_idle.c
> @@ -0,0 +1,239 @@
> +/*
> + * processor_idle - idle state cpuidle driver.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/moduleparam.h>
> +#include <linux/cpuidle.h>
> +#include <linux/cpu.h>
> +#include <linux/notifier.h>
> +
> +#include <asm/machdep.h>
> +#include <asm/runlatch.h>
> +
> +struct cpuidle_driver powernv_idle_driver = {
> + .name = "powernv_idle",
> + .owner = THIS_MODULE,
> +};
> +
> +#define MAX_IDLE_STATE_COUNT 2
> +
> +static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
> +static struct cpuidle_device __percpu *powernv_cpuidle_devices;
> +static struct cpuidle_state *cpuidle_state_table;
> +
> +static int snooze_loop(struct cpuidle_device *dev,
> + struct cpuidle_driver *drv,
> + int index)
> +{
> + int cpu = dev->cpu;
> +
> + local_irq_enable();
> + set_thread_flag(TIF_POLLING_NRFLAG);
> +
> + while ((!need_resched()) && cpu_online(cpu)) {
> + ppc64_runlatch_off();
> + HMT_very_low();
> + }
Why are you using the cpu_online test here ?
> +
> + HMT_medium();
> + clear_thread_flag(TIF_POLLING_NRFLAG);
> + smp_mb();
> + return index;
> +}
> +
> +
> +static int nap_loop(struct cpuidle_device *dev,
> + struct cpuidle_driver *drv,
> + int index)
> +{
> + ppc64_runlatch_off();
> + power7_idle();
> + return index;
> +}
> +
> +/*
> + * States for dedicated partition case.
> + */
> +static struct cpuidle_state powernv_states[MAX_IDLE_STATE_COUNT] = {
> + { /* Snooze */
> + .name = "snooze",
> + .desc = "snooze",
> + .flags = CPUIDLE_FLAG_TIME_VALID,
> + .exit_latency = 0,
> + .target_residency = 0,
> + .enter = &snooze_loop },
> + { /* Nap */
> + .name = "Nap",
> + .desc = "Nap",
> + .flags = CPUIDLE_FLAG_TIME_VALID,
> + .exit_latency = 10,
> + .target_residency = 100,
> + .enter = &nap_loop },
> +};
> +
> +static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
> + unsigned long action, void *hcpu)
> +{
> + int hotcpu = (unsigned long)hcpu;
> + struct cpuidle_device *dev =
> + per_cpu_ptr(powernv_cpuidle_devices, hotcpu);
> +
> + if (dev && cpuidle_get_driver()) {
> + switch (action) {
> + case CPU_ONLINE:
> + case CPU_ONLINE_FROZEN:
> + cpuidle_pause_and_lock();
> + cpuidle_enable_device(dev);
> + cpuidle_resume_and_unlock();
> + break;
> +
> + case CPU_DEAD:
> + case CPU_DEAD_FROZEN:
> + cpuidle_pause_and_lock();
> + cpuidle_disable_device(dev);
> + cpuidle_resume_and_unlock();
> + break;
> +
> + default:
> + return NOTIFY_DONE;
> + }
> + }
> + return NOTIFY_OK;
> +}
> +
> +static struct notifier_block setup_hotplug_notifier = {
> + .notifier_call = powernv_cpuidle_add_cpu_notifier,
> +};
This is duplicated code with the pseries cpuidle driver and IMHO it
should be moved to the cpuidle framework.
> +/*
> + * powernv_cpuidle_driver_init()
> + */
> +static int powernv_cpuidle_driver_init(void)
> +{
> + int idle_state;
> + struct cpuidle_driver *drv = &powernv_idle_driver;
> +
> + drv->state_count = 0;
> +
> + for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) {
> +
> + if (idle_state > max_idle_state)
> + break;
> +
> + /* is the state not enabled? */
> + if (cpuidle_state_table[idle_state].enter == NULL)
> + continue;
> +
> + drv->states[drv->state_count] = /* structure copy */
> + cpuidle_state_table[idle_state];
> +
> + drv->state_count += 1;
> + }
> +
> + return 0;
> +}
Instead of doing struct copy, why don't you use the state's 'disable'
field of the driver and then enable the state in the routine ?
> +/* powernv_idle_devices_uninit(void)
> + * unregister cpuidle devices and de-allocate memory
> + */
> +static void powernv_idle_devices_uninit(void)
> +{
> + int i;
> + struct cpuidle_device *dev;
> +
> + for_each_possible_cpu(i) {
> + dev = per_cpu_ptr(powernv_cpuidle_devices, i);
> + cpuidle_unregister_device(dev);
> + }
> +
> + free_percpu(powernv_cpuidle_devices);
> + return;
> +}
> +
> +/* powernv_idle_devices_init()
> + * allocate, initialize and register cpuidle device
> + */
> +static int powernv_idle_devices_init(void)
> +{
> + int i;
> + struct cpuidle_driver *drv = &powernv_idle_driver;
> + struct cpuidle_device *dev;
> +
> + powernv_cpuidle_devices = alloc_percpu(struct cpuidle_device);
> + if (powernv_cpuidle_devices == NULL)
> + return -ENOMEM;
> +
> + for_each_possible_cpu(i) {
> + dev = per_cpu_ptr(powernv_cpuidle_devices, i);
> + dev->state_count = drv->state_count;
> + dev->cpu = i;
> + if (cpuidle_register_device(dev)) {
> + printk(KERN_DEBUG \
> + "cpuidle_register_device %d failed!\n", i);
> + return -EIO;
> + }
> + }
> + return 0;
There is now the cpuidle_register(struct cpuidle_driver *, cpumask *);
You can get rid of the cpuidle_device struct and this init routine.
> +}
> +
> +/*
> + * powernv_idle_probe()
> + * Choose state table for shared versus dedicated partition
> + */
> +static int powernv_idle_probe(void)
> +{
> +
> + if (cpuidle_disable != IDLE_NO_OVERRIDE)
> + return -ENODEV;
> +
> + cpuidle_state_table = powernv_states;
> + return 0;
> +}
> +
> +static int __init powernv_processor_idle_init(void)
> +{
> + int retval;
> +
> + retval = powernv_idle_probe();
> + if (retval)
> + return retval;
> +
> + powernv_cpuidle_driver_init();
> + retval = cpuidle_register_driver(&powernv_idle_driver);
> + if (retval) {
> + printk(KERN_DEBUG "Registration of powernv driver failed.\n");
> + return retval;
> + }
> +
> + retval = powernv_idle_devices_init();
> + if (retval) {
> + powernv_idle_devices_uninit();
> + cpuidle_unregister_driver(&powernv_idle_driver);
> + return retval;
> + }
> +
> + register_cpu_notifier(&setup_hotplug_notifier);
> + printk(KERN_DEBUG "powernv_idle_driver registered\n");
> +
> + return 0;
> +}
> +
> +static void __exit powernv_processor_idle_exit(void)
> +{
> +
> + unregister_cpu_notifier(&setup_hotplug_notifier);
> + powernv_idle_devices_uninit();
> + cpuidle_unregister_driver(&powernv_idle_driver);
> +
> + return;
> +}
> +
> +module_init(powernv_processor_idle_init);
> +module_exit(powernv_processor_idle_exit);
> +
> +MODULE_AUTHOR("Deepthi Dharwar <[email protected]>");
> +MODULE_DESCRIPTION("Cpuidle driver for POWERNV");
> +MODULE_LICENSE("GPL");
>
--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
On 07/23/2013 11:01 AM, Deepthi Dharwar wrote:
> This patch enables idle powernv cpu to hook on to the cpuidle
> framework, if available, else call on to default idle platform
> code.
Why do you need to do that ?
> Signed-off-by: Deepthi Dharwar <[email protected]>
> ---
> arch/powerpc/platforms/powernv/setup.c | 12 +++++++++++-
> 1 file changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
> index 84438af..97d0951 100644
> --- a/arch/powerpc/platforms/powernv/setup.c
> +++ b/arch/powerpc/platforms/powernv/setup.c
> @@ -25,6 +25,7 @@
> #include <linux/of.h>
> #include <linux/interrupt.h>
> #include <linux/bug.h>
> +#include <linux/cpuidle.h>
>
> #include <asm/machdep.h>
> #include <asm/firmware.h>
> @@ -196,6 +197,15 @@ static int __init pnv_probe(void)
> return 1;
> }
>
> +void powernv_idle(void)
> +{
> + /* Hook to cpuidle framework if available, else
> + * call on default platform idle code
> + */
> + if (cpuidle_idle_call())
> + power7_idle();
> +}
> +
> define_machine(powernv) {
> .name = "PowerNV",
> .probe = pnv_probe,
> @@ -205,7 +215,7 @@ define_machine(powernv) {
> .show_cpuinfo = pnv_show_cpuinfo,
> .progress = pnv_progress,
> .machine_shutdown = pnv_shutdown,
> - .power_save = power7_idle,
> + .power_save = powernv_idle,
> .calibrate_decr = generic_calibrate_decr,
> #ifdef CONFIG_KEXEC
> .kexec_cpu_down = pnv_kexec_cpu_down,
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
On 07/27/2013 10:59 AM, Daniel Lezcano wrote:
> On 07/23/2013 11:01 AM, Deepthi Dharwar wrote:
>> This patch enables idle powernv cpu to hook on to the cpuidle
>> framework, if available, else call on to default idle platform
>> code.
>
> Why do you need to do that ?
>
Hi Daniel,
Well, this is needed in case when one does not compile with CPU_IDLE
config option.
When CPUIDLE is not configured, idle cpus are needed to be running some
sort of default idle code or loop ( in the worst case) if cpuidle driver
is not registered or if one decides not to have that config enabled.
Regards,
Deepthi
>
>> Signed-off-by: Deepthi Dharwar <[email protected]>
>> ---
>> arch/powerpc/platforms/powernv/setup.c | 12 +++++++++++-
>> 1 file changed, 11 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
>> index 84438af..97d0951 100644
>> --- a/arch/powerpc/platforms/powernv/setup.c
>> +++ b/arch/powerpc/platforms/powernv/setup.c
>> @@ -25,6 +25,7 @@
>> #include <linux/of.h>
>> #include <linux/interrupt.h>
>> #include <linux/bug.h>
>> +#include <linux/cpuidle.h>
>>
>> #include <asm/machdep.h>
>> #include <asm/firmware.h>
>> @@ -196,6 +197,15 @@ static int __init pnv_probe(void)
>> return 1;
>> }
>>
>> +void powernv_idle(void)
>> +{
>> + /* Hook to cpuidle framework if available, else
>> + * call on default platform idle code
>> + */
>> + if (cpuidle_idle_call())
>> + power7_idle();
>> +}
>> +
>> define_machine(powernv) {
>> .name = "PowerNV",
>> .probe = pnv_probe,
>> @@ -205,7 +215,7 @@ define_machine(powernv) {
>> .show_cpuinfo = pnv_show_cpuinfo,
>> .progress = pnv_progress,
>> .machine_shutdown = pnv_shutdown,
>> - .power_save = power7_idle,
>> + .power_save = powernv_idle,
>> .calibrate_decr = generic_calibrate_decr,
>> #ifdef CONFIG_KEXEC
>> .kexec_cpu_down = pnv_kexec_cpu_down,
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at http://www.tux.org/lkml/
>>
>
>
Hi Daniel,
On 07/27/2013 10:57 AM, Daniel Lezcano wrote:
> On 07/23/2013 11:01 AM, Deepthi Dharwar wrote:
>> This patch implements a back-end cpuidle driver for
>> powernv calling power7_nap and snooze idle states.
>> This can be extended by adding more idle states
>> in the future to the existing framework.
>>
>> Signed-off-by: Deepthi Dharwar <[email protected]>
>> ---
>> arch/powerpc/platforms/powernv/Kconfig | 9 +
>> arch/powerpc/platforms/powernv/Makefile | 1
>> arch/powerpc/platforms/powernv/processor_idle.c | 239 +++++++++++++++++++++++
>> 3 files changed, 249 insertions(+)
>> create mode 100644 arch/powerpc/platforms/powernv/processor_idle.c
>>
>> diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
>> index c24684c..ace2d22 100644
>> --- a/arch/powerpc/platforms/powernv/Kconfig
>> +++ b/arch/powerpc/platforms/powernv/Kconfig
>> @@ -20,3 +20,12 @@ config PPC_POWERNV_RTAS
>> default y
>> select PPC_ICS_RTAS
>> select PPC_RTAS
>> +
>> +config POWERNV_IDLE
>> + bool "CPUIdle driver for powernv platform"
>> + depends on CPU_IDLE
>> + depends on PPC_POWERNV
>> + default y
>> + help
>> + Select this option to enable processor idle state management
>> + through cpuidle subsystem.
>> diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
>> index 7fe5951..c0e44eb 100644
>> --- a/arch/powerpc/platforms/powernv/Makefile
>> +++ b/arch/powerpc/platforms/powernv/Makefile
>> @@ -4,3 +4,4 @@ obj-y += opal-rtc.o opal-nvram.o
>> obj-$(CONFIG_SMP) += smp.o
>> obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o
>> obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o
>> +obj-$(CONFIG_POWERNV_IDLE) += processor_idle.o
>> diff --git a/arch/powerpc/platforms/powernv/processor_idle.c b/arch/powerpc/platforms/powernv/processor_idle.c
>> new file mode 100644
>> index 0000000..f43ad91a
>> --- /dev/null
>> +++ b/arch/powerpc/platforms/powernv/processor_idle.c
>> @@ -0,0 +1,239 @@
>> +/*
>> + * processor_idle - idle state cpuidle driver.
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/module.h>
>> +#include <linux/init.h>
>> +#include <linux/moduleparam.h>
>> +#include <linux/cpuidle.h>
>> +#include <linux/cpu.h>
>> +#include <linux/notifier.h>
>> +
>> +#include <asm/machdep.h>
>> +#include <asm/runlatch.h>
>> +
>> +struct cpuidle_driver powernv_idle_driver = {
>> + .name = "powernv_idle",
>> + .owner = THIS_MODULE,
>> +};
>> +
>> +#define MAX_IDLE_STATE_COUNT 2
>> +
>> +static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
>> +static struct cpuidle_device __percpu *powernv_cpuidle_devices;
>> +static struct cpuidle_state *cpuidle_state_table;
>> +
>> +static int snooze_loop(struct cpuidle_device *dev,
>> + struct cpuidle_driver *drv,
>> + int index)
>> +{
>> + int cpu = dev->cpu;
>> +
>> + local_irq_enable();
>> + set_thread_flag(TIF_POLLING_NRFLAG);
>> +
>> + while ((!need_resched()) && cpu_online(cpu)) {
>> + ppc64_runlatch_off();
>> + HMT_very_low();
>> + }
>
> Why are you using the cpu_online test here ?
Snooze state is an idle state where cpu executes an infinite loop by
reducing the priority of the thread and the idle cpu can come out of it
only if need_resched is set or in case the cpu is offlined. In order to
continue executing this loop to remain in this idle state, we need the
check just to be safe.
>> +
>> + HMT_medium();
>> + clear_thread_flag(TIF_POLLING_NRFLAG);
>> + smp_mb();
>> + return index;
>> +}
>> +
>> +
>> +static int nap_loop(struct cpuidle_device *dev,
>> + struct cpuidle_driver *drv,
>> + int index)
>> +{
>> + ppc64_runlatch_off();
>> + power7_idle();
>> + return index;
>> +}
>> +
>> +/*
>> + * States for dedicated partition case.
>> + */
>> +static struct cpuidle_state powernv_states[MAX_IDLE_STATE_COUNT] = {
>> + { /* Snooze */
>> + .name = "snooze",
>> + .desc = "snooze",
>> + .flags = CPUIDLE_FLAG_TIME_VALID,
>> + .exit_latency = 0,
>> + .target_residency = 0,
>> + .enter = &snooze_loop },
>> + { /* Nap */
>> + .name = "Nap",
>> + .desc = "Nap",
>> + .flags = CPUIDLE_FLAG_TIME_VALID,
>> + .exit_latency = 10,
>> + .target_residency = 100,
>> + .enter = &nap_loop },
>> +};
>> +
>> +static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
>> + unsigned long action, void *hcpu)
>> +{
>> + int hotcpu = (unsigned long)hcpu;
>> + struct cpuidle_device *dev =
>> + per_cpu_ptr(powernv_cpuidle_devices, hotcpu);
>> +
>> + if (dev && cpuidle_get_driver()) {
>> + switch (action) {
>> + case CPU_ONLINE:
>> + case CPU_ONLINE_FROZEN:
>> + cpuidle_pause_and_lock();
>> + cpuidle_enable_device(dev);
>> + cpuidle_resume_and_unlock();
>> + break;
>> +
>> + case CPU_DEAD:
>> + case CPU_DEAD_FROZEN:
>> + cpuidle_pause_and_lock();
>> + cpuidle_disable_device(dev);
>> + cpuidle_resume_and_unlock();
>> + break;
>> +
>> + default:
>> + return NOTIFY_DONE;
>> + }
>> + }
>> + return NOTIFY_OK;
>> +}
>> +
>> +static struct notifier_block setup_hotplug_notifier = {
>> + .notifier_call = powernv_cpuidle_add_cpu_notifier,
>> +};
>
> This is duplicated code with the pseries cpuidle driver and IMHO it
> should be moved to the cpuidle framework.
Yes, a lot of code here is there in pseries cpuidle driver. I am
re-factoring that aspect so that we can use one back-end driver for both
pseries and powernv. I will post it out soon.
Moving the hotplug handler to cpuidle can be done as a separate feature.
This needs change in all the other archs that use cpuidle and change in
the framework itself.
>
>> +/*
>> + * powernv_cpuidle_driver_init()
>> + */
>> +static int powernv_cpuidle_driver_init(void)
>> +{
>> + int idle_state;
>> + struct cpuidle_driver *drv = &powernv_idle_driver;
>> +
>> + drv->state_count = 0;
>> +
>> + for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) {
>> +
>> + if (idle_state > max_idle_state)
>> + break;
>> +
>> + /* is the state not enabled? */
>> + if (cpuidle_state_table[idle_state].enter == NULL)
>> + continue;
>> +
>> + drv->states[drv->state_count] = /* structure copy */
>> + cpuidle_state_table[idle_state];
>> +
>> + drv->state_count += 1;
>> + }
>> +
>> + return 0;
>> +}
>
>
> Instead of doing struct copy, why don't you use the state's 'disable'
> field of the driver and then enable the state in the routine ?
Going forward, having a single driver for powernv and pseries, I would
like to have two separate cpuidle state table for each arch. And both
would have their idle states and corresponding routines. Combining
different arch idle routines into one table and enabling/disabling them
would be quite confusing.
>
>> +/* powernv_idle_devices_uninit(void)
>> + * unregister cpuidle devices and de-allocate memory
>> + */
>> +static void powernv_idle_devices_uninit(void)
>> +{
>> + int i;
>> + struct cpuidle_device *dev;
>> +
>> + for_each_possible_cpu(i) {
>> + dev = per_cpu_ptr(powernv_cpuidle_devices, i);
>> + cpuidle_unregister_device(dev);
>> + }
>> +
>> + free_percpu(powernv_cpuidle_devices);
>> + return;
>> +}
>> +
>> +/* powernv_idle_devices_init()
>> + * allocate, initialize and register cpuidle device
>> + */
>> +static int powernv_idle_devices_init(void)
>> +{
>> + int i;
>> + struct cpuidle_driver *drv = &powernv_idle_driver;
>> + struct cpuidle_device *dev;
>> +
>> + powernv_cpuidle_devices = alloc_percpu(struct cpuidle_device);
>> + if (powernv_cpuidle_devices == NULL)
>> + return -ENOMEM;
>> +
>> + for_each_possible_cpu(i) {
>> + dev = per_cpu_ptr(powernv_cpuidle_devices, i);
>> + dev->state_count = drv->state_count;
>> + dev->cpu = i;
>> + if (cpuidle_register_device(dev)) {
>> + printk(KERN_DEBUG \
>> + "cpuidle_register_device %d failed!\n", i);
>> + return -EIO;
>> + }
>> + }
>> + return 0;
>
>
> There is now the cpuidle_register(struct cpuidle_driver *, cpumask *);
>
> You can get rid of the cpuidle_device struct and this init routine.
Thanks for the pointer. I will look into this.
>> +}
>> +
>> +/*
>> + * powernv_idle_probe()
>> + * Choose state table for shared versus dedicated partition
>> + */
>> +static int powernv_idle_probe(void)
>> +{
>> +
>> + if (cpuidle_disable != IDLE_NO_OVERRIDE)
>> + return -ENODEV;
>> +
>> + cpuidle_state_table = powernv_states;
>> + return 0;
>> +}
>> +
>> +static int __init powernv_processor_idle_init(void)
>> +{
>> + int retval;
>> +
>> + retval = powernv_idle_probe();
>> + if (retval)
>> + return retval;
>> +
>> + powernv_cpuidle_driver_init();
>> + retval = cpuidle_register_driver(&powernv_idle_driver);
>> + if (retval) {
>> + printk(KERN_DEBUG "Registration of powernv driver failed.\n");
>> + return retval;
>> + }
>> +
>> + retval = powernv_idle_devices_init();
>> + if (retval) {
>> + powernv_idle_devices_uninit();
>> + cpuidle_unregister_driver(&powernv_idle_driver);
>> + return retval;
>> + }
>> +
>> + register_cpu_notifier(&setup_hotplug_notifier);
>> + printk(KERN_DEBUG "powernv_idle_driver registered\n");
>> +
>> + return 0;
>> +}
>> +
>> +static void __exit powernv_processor_idle_exit(void)
>> +{
>> +
>> + unregister_cpu_notifier(&setup_hotplug_notifier);
>> + powernv_idle_devices_uninit();
>> + cpuidle_unregister_driver(&powernv_idle_driver);
>> +
>> + return;
>> +}
>> +
>> +module_init(powernv_processor_idle_init);
>> +module_exit(powernv_processor_idle_exit);
>> +
>> +MODULE_AUTHOR("Deepthi Dharwar <[email protected]>");
>> +MODULE_DESCRIPTION("Cpuidle driver for POWERNV");
>> +MODULE_LICENSE("GPL");
>>
>
>
Thanks a lot for your time and review.
Regards,
Deepthi
On 07/29/2013 04:27 PM, Deepthi Dharwar wrote:
> On 07/27/2013 10:59 AM, Daniel Lezcano wrote:
>> On 07/23/2013 11:01 AM, Deepthi Dharwar wrote:
>>> This patch enables idle powernv cpu to hook on to the cpuidle
>>> framework, if available, else call on to default idle platform
>>> code.
>>
>> Why do you need to do that ?
>>
>
> Hi Daniel,
>
> Well, this is needed in case when one does not compile with CPU_IDLE
> config option.
>
> When CPUIDLE is not configured, idle cpus are needed to be running some
> sort of default idle code or loop ( in the worst case) if cpuidle driver
> is not registered or if one decides not to have that config enabled.
Hi Deepthi,
ok may be there is some difference with the other platform but the
arch_cpu_idle function defined in the other archs do:
void arch_cpu_idle(void)
{
if (cpuidle_idle_call())
x86_idle();
else
local_irq_enable();
}
or
void arch_cpu_idle(void)
{
if (cpuidle_idle_call())
default_idle();
}
When the cpuidle driver is not compiled or not loaded, cpuidle_idle_call
fails, falling back to the default idle function.
The arch_cpu_idle function is called from the generic code in
kernel/cpu/idle.c.
Is there a particular reason to do it in a different way ?
Thanks
-- Daniel
>>
>>> Signed-off-by: Deepthi Dharwar <[email protected]>
>>> ---
>>> arch/powerpc/platforms/powernv/setup.c | 12 +++++++++++-
>>> 1 file changed, 11 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
>>> index 84438af..97d0951 100644
>>> --- a/arch/powerpc/platforms/powernv/setup.c
>>> +++ b/arch/powerpc/platforms/powernv/setup.c
>>> @@ -25,6 +25,7 @@
>>> #include <linux/of.h>
>>> #include <linux/interrupt.h>
>>> #include <linux/bug.h>
>>> +#include <linux/cpuidle.h>
>>>
>>> #include <asm/machdep.h>
>>> #include <asm/firmware.h>
>>> @@ -196,6 +197,15 @@ static int __init pnv_probe(void)
>>> return 1;
>>> }
>>>
>>> +void powernv_idle(void)
>>> +{
>>> + /* Hook to cpuidle framework if available, else
>>> + * call on default platform idle code
>>> + */
>>> + if (cpuidle_idle_call())
>>> + power7_idle();
>>> +}
>>> +
>>> define_machine(powernv) {
>>> .name = "PowerNV",
>>> .probe = pnv_probe,
>>> @@ -205,7 +215,7 @@ define_machine(powernv) {
>>> .show_cpuinfo = pnv_show_cpuinfo,
>>> .progress = pnv_progress,
>>> .machine_shutdown = pnv_shutdown,
>>> - .power_save = power7_idle,
>>> + .power_save = powernv_idle,
>>> .calibrate_decr = generic_calibrate_decr,
>>> #ifdef CONFIG_KEXEC
>>> .kexec_cpu_down = pnv_kexec_cpu_down,
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>>> the body of a message to [email protected]
>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>> Please read the FAQ at http://www.tux.org/lkml/
>>>
>>
>>
>
--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
On 07/29/2013 04:39 PM, Deepthi Dharwar wrote:
> Hi Daniel,
>
> On 07/27/2013 10:57 AM, Daniel Lezcano wrote:
>> On 07/23/2013 11:01 AM, Deepthi Dharwar wrote:
>>> This patch implements a back-end cpuidle driver for
>>> powernv calling power7_nap and snooze idle states.
>>> This can be extended by adding more idle states
>>> in the future to the existing framework.
>>>
>>> Signed-off-by: Deepthi Dharwar <[email protected]>
[ ... ]
>>> +static int snooze_loop(struct cpuidle_device *dev,
>>> + struct cpuidle_driver *drv,
>>> + int index)
>>> +{
>>> + int cpu = dev->cpu;
>>> +
>>> + local_irq_enable();
>>> + set_thread_flag(TIF_POLLING_NRFLAG);
>>> +
>>> + while ((!need_resched()) && cpu_online(cpu)) {
>>> + ppc64_runlatch_off();
>>> + HMT_very_low();
>>> + }
>>
>> Why are you using the cpu_online test here ?
>
> Snooze state is an idle state where cpu executes an infinite loop by
> reducing the priority of the thread and the idle cpu can come out of it
> only if need_resched is set or in case the cpu is offlined. In order to
> continue executing this loop to remain in this idle state, we need the
> check just to be safe.
Yes, but if the cpu is offline you are no longer executing this code, no ?
--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
On 07/29/2013 08:14 PM, Daniel Lezcano wrote:
> On 07/29/2013 04:27 PM, Deepthi Dharwar wrote:
>> On 07/27/2013 10:59 AM, Daniel Lezcano wrote:
>>> On 07/23/2013 11:01 AM, Deepthi Dharwar wrote:
>>>> This patch enables idle powernv cpu to hook on to the cpuidle
>>>> framework, if available, else call on to default idle platform
>>>> code.
>>>
>>> Why do you need to do that ?
>>>
>>
>> Hi Daniel,
>>
>> Well, this is needed in case when one does not compile with CPU_IDLE
>> config option.
>>
>> When CPUIDLE is not configured, idle cpus are needed to be running some
>> sort of default idle code or loop ( in the worst case) if cpuidle driver
>> is not registered or if one decides not to have that config enabled.
>
> Hi Deepthi,
>
> ok may be there is some difference with the other platform but the
> arch_cpu_idle function defined in the other archs do:
>
> void arch_cpu_idle(void)
> {
> if (cpuidle_idle_call())
> x86_idle();
> else
> local_irq_enable();
> }
>
> or
>
> void arch_cpu_idle(void)
> {
> if (cpuidle_idle_call())
> default_idle();
> }
>
> When the cpuidle driver is not compiled or not loaded, cpuidle_idle_call
> fails, falling back to the default idle function.
>
> The arch_cpu_idle function is called from the generic code in
> kernel/cpu/idle.c.
>
> Is there a particular reason to do it in a different way ?
On powerpc, we have another parameter, ppc_md.powersave
knob. In arch_cpu_idle, we check for this knob. This is kernel command
line parameter too. If this is not set then default idle is executed.
ppc_md is generic powerpc structure, which points to idle routine that
needs to be executed.
In arch_cpu_idle code for powerpc:
arch_cpu_idle()
{
if (ppc_md.powersave())
ppc_md.powersave()
} else
/* default idle */
}
Not all flavors of powerpc arch support cpuidle.
So only those that support, powersave is set to cpuidle backend call.
And this check is necessary there.
Regards,
Deepthi
> Thanks
> -- Daniel
>
>>>
>>>> Signed-off-by: Deepthi Dharwar <[email protected]>
>>>> ---
>>>> arch/powerpc/platforms/powernv/setup.c | 12 +++++++++++-
>>>> 1 file changed, 11 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
>>>> index 84438af..97d0951 100644
>>>> --- a/arch/powerpc/platforms/powernv/setup.c
>>>> +++ b/arch/powerpc/platforms/powernv/setup.c
>>>> @@ -25,6 +25,7 @@
>>>> #include <linux/of.h>
>>>> #include <linux/interrupt.h>
>>>> #include <linux/bug.h>
>>>> +#include <linux/cpuidle.h>
>>>>
>>>> #include <asm/machdep.h>
>>>> #include <asm/firmware.h>
>>>> @@ -196,6 +197,15 @@ static int __init pnv_probe(void)
>>>> return 1;
>>>> }
>>>>
>>>> +void powernv_idle(void)
>>>> +{
>>>> + /* Hook to cpuidle framework if available, else
>>>> + * call on default platform idle code
>>>> + */
>>>> + if (cpuidle_idle_call())
>>>> + power7_idle();
>>>> +}
>>>> +
>>>> define_machine(powernv) {
>>>> .name = "PowerNV",
>>>> .probe = pnv_probe,
>>>> @@ -205,7 +215,7 @@ define_machine(powernv) {
>>>> .show_cpuinfo = pnv_show_cpuinfo,
>>>> .progress = pnv_progress,
>>>> .machine_shutdown = pnv_shutdown,
>>>> - .power_save = power7_idle,
>>>> + .power_save = powernv_idle,
>>>> .calibrate_decr = generic_calibrate_decr,
>>>> #ifdef CONFIG_KEXEC
>>>> .kexec_cpu_down = pnv_kexec_cpu_down,
>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>>>> the body of a message to [email protected]
>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>>> Please read the FAQ at http://www.tux.org/lkml/
>>>>
>>>
>>>
>>
>
>
On 07/29/2013 08:23 PM, Daniel Lezcano wrote:
> On 07/29/2013 04:39 PM, Deepthi Dharwar wrote:
>> Hi Daniel,
>>
>> On 07/27/2013 10:57 AM, Daniel Lezcano wrote:
>>> On 07/23/2013 11:01 AM, Deepthi Dharwar wrote:
>>>> This patch implements a back-end cpuidle driver for
>>>> powernv calling power7_nap and snooze idle states.
>>>> This can be extended by adding more idle states
>>>> in the future to the existing framework.
>>>>
>>>> Signed-off-by: Deepthi Dharwar <[email protected]>
>
> [ ... ]
>
>>>> +static int snooze_loop(struct cpuidle_device *dev,
>>>> + struct cpuidle_driver *drv,
>>>> + int index)
>>>> +{
>>>> + int cpu = dev->cpu;
>>>> +
>>>> + local_irq_enable();
>>>> + set_thread_flag(TIF_POLLING_NRFLAG);
>>>> +
>>>> + while ((!need_resched()) && cpu_online(cpu)) {
>>>> + ppc64_runlatch_off();
>>>> + HMT_very_low();
>>>> + }
>>>
>>> Why are you using the cpu_online test here ?
>>
>> Snooze state is an idle state where cpu executes an infinite loop by
>> reducing the priority of the thread and the idle cpu can come out of it
>> only if need_resched is set or in case the cpu is offlined. In order to
>> continue executing this loop to remain in this idle state, we need the
>> check just to be safe.
>
> Yes, but if the cpu is offline you are no longer executing this code, no ?
>
Yes, not needed.
Thanks !
Deepthi
Hi Daniel,
On 07/27/2013 10:57 AM, Daniel Lezcano wrote:
> On 07/23/2013 11:01 AM, Deepthi Dharwar wrote:
>> This patch implements a back-end cpuidle driver for
>> powernv calling power7_nap and snooze idle states.
>> This can be extended by adding more idle states
>> in the future to the existing framework.
>>
>> Signed-off-by: Deepthi Dharwar <[email protected]>
>> ---
>> arch/powerpc/platforms/powernv/Kconfig | 9 +
>> arch/powerpc/platforms/powernv/Makefile | 1
>> arch/powerpc/platforms/powernv/processor_idle.c | 239 +++++++++++++++++++++++
>> 3 files changed, 249 insertions(+)
>> create mode 100644 arch/powerpc/platforms/powernv/processor_idle.c
>>
>> diff --git a/arch/powerpc/platforms/powernv/processor_idle.c b/arch/powerpc/platforms/powernv/processor_idle.c
>> new file mode 100644
>> index 0000000..f43ad91a
>> --- /dev/null
>> +++ b/arch/powerpc/platforms/powernv/processor_idle.c
>> @@ -0,0 +1,239 @@
>> +/*
>> + * processor_idle - idle state cpuidle driver.
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/module.h>
>> +#include <linux/init.h>
>> +#include <linux/moduleparam.h>
>> +#include <linux/cpuidle.h>
>> +#include <linux/cpu.h>
>> +#include <linux/notifier.h>
>> +
>> +#include <asm/machdep.h>
>> +#include <asm/runlatch.h>
>> +
>> +struct cpuidle_driver powernv_idle_driver = {
>> + .name = "powernv_idle",
>> + .owner = THIS_MODULE,
>> +};
>> +
>> +#define MAX_IDLE_STATE_COUNT 2
>> +
>> +static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
>> +static struct cpuidle_device __percpu *powernv_cpuidle_devices;
>> +static struct cpuidle_state *cpuidle_state_table;
>> +
>> +static int snooze_loop(struct cpuidle_device *dev,
>> + struct cpuidle_driver *drv,
>> + int index)
>> +{
>> + int cpu = dev->cpu;
>> +
>> + local_irq_enable();
>> + set_thread_flag(TIF_POLLING_NRFLAG);
>> +
>> + while ((!need_resched()) && cpu_online(cpu)) {
>> + ppc64_runlatch_off();
>> + HMT_very_low();
>> + }
>
> Why are you using the cpu_online test here ?
>
>> +
>> + HMT_medium();
>> + clear_thread_flag(TIF_POLLING_NRFLAG);
>> + smp_mb();
>> + return index;
>> +}
>> +
>> +
>> +static int nap_loop(struct cpuidle_device *dev,
>> + struct cpuidle_driver *drv,
>> + int index)
>> +{
>> + ppc64_runlatch_off();
>> + power7_idle();
>> + return index;
>> +}
>> +
>> +/*
>> + * States for dedicated partition case.
>> + */
>> +static struct cpuidle_state powernv_states[MAX_IDLE_STATE_COUNT] = {
>> + { /* Snooze */
>> + .name = "snooze",
>> + .desc = "snooze",
>> + .flags = CPUIDLE_FLAG_TIME_VALID,
>> + .exit_latency = 0,
>> + .target_residency = 0,
>> + .enter = &snooze_loop },
>> + { /* Nap */
>> + .name = "Nap",
>> + .desc = "Nap",
>> + .flags = CPUIDLE_FLAG_TIME_VALID,
>> + .exit_latency = 10,
>> + .target_residency = 100,
>> + .enter = &nap_loop },
>> +};
>> +
>> +static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
>> + unsigned long action, void *hcpu)
>> +{
>> + int hotcpu = (unsigned long)hcpu;
>> + struct cpuidle_device *dev =
>> + per_cpu_ptr(powernv_cpuidle_devices, hotcpu);
>> +
>> + if (dev && cpuidle_get_driver()) {
>> + switch (action) {
>> + case CPU_ONLINE:
>> + case CPU_ONLINE_FROZEN:
>> + cpuidle_pause_and_lock();
>> + cpuidle_enable_device(dev);
>> + cpuidle_resume_and_unlock();
>> + break;
>> +
>> + case CPU_DEAD:
>> + case CPU_DEAD_FROZEN:
>> + cpuidle_pause_and_lock();
>> + cpuidle_disable_device(dev);
>> + cpuidle_resume_and_unlock();
>> + break;
>> +
>> + default:
>> + return NOTIFY_DONE;
>> + }
>> + }
>> + return NOTIFY_OK;
>> +}
>> +
>> +static struct notifier_block setup_hotplug_notifier = {
>> + .notifier_call = powernv_cpuidle_add_cpu_notifier,
>> +};
>
> This is duplicated code with the pseries cpuidle driver and IMHO it
> should be moved to the cpuidle framework.
>
Will this not require a cleanup of the hotplug cpuidle notifiers from
other architectures into the cpuidle framework as well?
Regards
Preeti U Murthy