schedule_timeout wakes up the CPU from IDLE state. For some use cases it
is not desirable, hence introduce a convenient API
(schedule_timeout_deferrable_interruptible) on similar pattern which uses
a deferrable timer.
Signed-off-by: Chintan Pandya <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: John Stultz <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Hugh Dickins <[email protected]>
---
Changes:
V2-->V3:
- Big comment moved from static function to exported function
- Using __setup_timer_on_stack for better readability
V2:
- this patch has been newly introduced in patch v2
include/linux/sched.h | 2 ++
kernel/time/timer.c | 73 +++++++++++++++++++++++++++++++--------------------
2 files changed, 47 insertions(+), 28 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89f531e..10b154e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -377,6 +377,8 @@ extern int in_sched_functions(unsigned long addr);
#define MAX_SCHEDULE_TIMEOUT LONG_MAX
extern signed long schedule_timeout(signed long timeout);
extern signed long schedule_timeout_interruptible(signed long timeout);
+extern signed long
+schedule_timeout_deferrable_interruptible(signed long timeout);
extern signed long schedule_timeout_killable(signed long timeout);
extern signed long schedule_timeout_uninterruptible(signed long timeout);
asmlinkage void schedule(void);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index aca5dfe..f4c4082 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1431,33 +1431,8 @@ static void process_timeout(unsigned long __data)
wake_up_process((struct task_struct *)__data);
}
-/**
- * schedule_timeout - sleep until timeout
- * @timeout: timeout value in jiffies
- *
- * Make the current task sleep until @timeout jiffies have
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
- * pass before the routine returns. The routine will return 0
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task. In this case the remaining time
- * in jiffies will be returned, or 0 if the timer expired in time
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- *
- * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
- * the CPU away without a bound on the timeout. In this case the return
- * value will be %MAX_SCHEDULE_TIMEOUT.
- *
- * In all cases the return value is guaranteed to be non-negative.
- */
-signed long __sched schedule_timeout(signed long timeout)
+static signed long
+__sched __schedule_timeout(signed long timeout, unsigned long flag)
{
struct timer_list timer;
unsigned long expire;
@@ -1493,7 +1468,9 @@ signed long __sched schedule_timeout(signed long timeout)
expire = timeout + jiffies;
- setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
+ __setup_timer_on_stack(&timer, process_timeout, (unsigned long)current,
+ flag);
+
__mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
schedule();
del_singleshot_timer_sync(&timer);
@@ -1506,12 +1483,52 @@ signed long __sched schedule_timeout(signed long timeout)
out:
return timeout < 0 ? 0 : timeout;
}
+
+/**
+ * schedule_timeout - sleep until timeout
+ * @timeout: timeout value in jiffies
+ *
+ * Make the current task sleep until @timeout jiffies have
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
+ * pass before the routine returns. The routine will return 0
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task. In this case the remaining time
+ * in jiffies will be returned, or 0 if the timer expired in time
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
+ * the CPU away without a bound on the timeout. In this case the return
+ * value will be %MAX_SCHEDULE_TIMEOUT.
+ *
+ * In all cases the return value is guaranteed to be non-negative.
+ */
+signed long __sched schedule_timeout(signed long timeout)
+{
+ return __schedule_timeout(timeout, 0);
+}
EXPORT_SYMBOL(schedule_timeout);
/*
* We can use __set_current_state() here because schedule_timeout() calls
* schedule() unconditionally.
*/
+
+signed long
+__sched schedule_timeout_deferrable_interruptible(signed long timeout)
+{
+ __set_current_state(TASK_INTERRUPTIBLE);
+ return __schedule_timeout(timeout, TIMER_DEFERRABLE);
+}
+EXPORT_SYMBOL(schedule_timeout_deferrable_interruptible);
+
signed long __sched schedule_timeout_interruptible(signed long timeout)
{
__set_current_state(TASK_INTERRUPTIBLE);
--
Chintan Pandya
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
member of the Code Aurora Forum, hosted by The Linux Foundation
KSM thread to scan pages is scheduled on definite timeout. That wakes up
CPU from idle state and hence may affect the power consumption. Provide
an optional support to use deferrable timer which suites low-power
use-cases.
Typically, on our setup we observed, 10% less power consumption with some
use-cases in which CPU goes to power collapse frequently. For example,
playing audio while typically CPU remains idle.
To enable deferrable timers,
$ echo 1 > /sys/kernel/mm/ksm/deferrable_timer
Signed-off-by: Chintan Pandya <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: John Stultz <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Hugh Dickins <[email protected]>
---
Changes:
V2-->V3:
- Handled error case properly
- Corrected indentation in Documentation
- Fixed build failure
- Removed left over process_timeout()
V1-->V2:
- allowing only valid values to be updated as use_deferrable_timer
- using only 'deferrable' and not 'deferred'
- moved out schedule_timeout code for deferrable timer into timer.c
Documentation/vm/ksm.txt | 7 +++++++
mm/ksm.c | 36 ++++++++++++++++++++++++++++++++++--
2 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
index f34a8ee..9735c87 100644
--- a/Documentation/vm/ksm.txt
+++ b/Documentation/vm/ksm.txt
@@ -87,6 +87,13 @@ pages_sharing - how many more sites are sharing them i.e. how much saved
pages_unshared - how many pages unique but repeatedly checked for merging
pages_volatile - how many pages changing too fast to be placed in a tree
full_scans - how many times all mergeable areas have been scanned
+deferrable_timer - whether to use deferrable timers or not
+ e.g. "echo 1 > /sys/kernel/mm/ksm/deferrable_timer"
+ Default: 0 (means, we are not using deferrable timers. Users
+ might want to set deferrable_timer option if they donot want
+ ksm thread to wakeup CPU to carryout ksm activities thus
+ gaining on battery while compromising slightly on memory
+ that could have been saved.)
A high ratio of pages_sharing to pages_shared indicates good sharing, but
a high ratio of pages_unshared to pages_sharing indicates wasted effort.
diff --git a/mm/ksm.c b/mm/ksm.c
index fb75902..434a50a 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -223,6 +223,9 @@ static unsigned int ksm_thread_pages_to_scan = 100;
/* Milliseconds ksmd should sleep between batches */
static unsigned int ksm_thread_sleep_millisecs = 20;
+/* Boolean to indicate whether to use deferrable timer or not */
+static bool use_deferrable_timer;
+
#ifdef CONFIG_NUMA
/* Zeroed when merging across nodes is not allowed */
static unsigned int ksm_merge_across_nodes = 1;
@@ -1725,8 +1728,13 @@ static int ksm_scan_thread(void *nothing)
try_to_freeze();
if (ksmd_should_run()) {
- schedule_timeout_interruptible(
- msecs_to_jiffies(ksm_thread_sleep_millisecs));
+ signed long to;
+
+ to = msecs_to_jiffies(ksm_thread_sleep_millisecs);
+ if (use_deferrable_timer)
+ schedule_timeout_deferrable_interruptible(to);
+ else
+ schedule_timeout_interruptible(to);
} else {
wait_event_freezable(ksm_thread_wait,
ksmd_should_run() || kthread_should_stop());
@@ -2175,6 +2183,29 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
}
KSM_ATTR(run);
+static ssize_t deferrable_timer_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return snprintf(buf, 8, "%d\n", use_deferrable_timer);
+}
+
+static ssize_t deferrable_timer_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long enable;
+ int err;
+
+ err = kstrtoul(buf, 10, &enable);
+ if (err < 0)
+ return err;
+ if (enable >= 1)
+ return -EINVAL;
+ use_deferrable_timer = enable;
+ return count;
+}
+KSM_ATTR(deferrable_timer);
+
#ifdef CONFIG_NUMA
static ssize_t merge_across_nodes_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -2287,6 +2318,7 @@ static struct attribute *ksm_attrs[] = {
&pages_unshared_attr.attr,
&pages_volatile_attr.attr,
&full_scans_attr.attr,
+ &deferrable_timer_attr.attr,
#ifdef CONFIG_NUMA
&merge_across_nodes_attr.attr,
#endif
--
Chintan Pandya
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
member of the Code Aurora Forum, hosted by The Linux Foundation